@djangocfg/ui-tools 2.1.381 → 2.1.383
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -899
- package/dist/ChatRoot-6IZFM5HM.mjs +5 -0
- package/dist/{ChatRoot-EJC5Y2YM.cjs.map → ChatRoot-6IZFM5HM.mjs.map} +1 -1
- package/dist/ChatRoot-LW4XNIKP.cjs +14 -0
- package/dist/{ChatRoot-QOSKJPM6.mjs.map → ChatRoot-LW4XNIKP.cjs.map} +1 -1
- package/dist/DictationField-U25MEYAL.mjs +4 -0
- package/dist/DictationField-U25MEYAL.mjs.map +1 -0
- package/dist/DictationField-XWR5VOID.cjs +13 -0
- package/dist/DictationField-XWR5VOID.cjs.map +1 -0
- package/dist/{DocsLayout-2YKPXZYO.mjs → DocsLayout-2P3ONDWJ.mjs} +3 -3
- package/dist/{DocsLayout-2YKPXZYO.mjs.map → DocsLayout-2P3ONDWJ.mjs.map} +1 -1
- package/dist/{DocsLayout-Q4KS3QWW.cjs → DocsLayout-2YZNS5VK.cjs} +8 -8
- package/dist/{DocsLayout-Q4KS3QWW.cjs.map → DocsLayout-2YZNS5VK.cjs.map} +1 -1
- package/dist/chunk-4PFW7MIJ.cjs +837 -0
- package/dist/chunk-4PFW7MIJ.cjs.map +1 -0
- package/dist/chunk-C2YN6WEO.mjs +833 -0
- package/dist/chunk-C2YN6WEO.mjs.map +1 -0
- package/dist/{chunk-XACCHZH2.cjs → chunk-FIRK5CEH.cjs} +42 -4
- package/dist/chunk-FIRK5CEH.cjs.map +1 -0
- package/dist/{chunk-NWUT327A.mjs → chunk-HIK6BPL7.mjs} +38 -5
- package/dist/chunk-HIK6BPL7.mjs.map +1 -0
- package/dist/chunk-OZAU3QWD.cjs +2493 -0
- package/dist/chunk-OZAU3QWD.cjs.map +1 -0
- package/dist/chunk-UWVP6LCW.mjs +2447 -0
- package/dist/chunk-UWVP6LCW.mjs.map +1 -0
- package/dist/index.cjs +1668 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1215 -107
- package/dist/index.d.ts +1215 -107
- package/dist/index.mjs +1555 -50
- package/dist/index.mjs.map +1 -1
- package/package.json +16 -15
- package/src/audio-assets.d.ts +8 -0
- package/src/components/markdown/MarkdownMessage/CollapseToggle.tsx +3 -1
- package/src/components/markdown/MarkdownMessage/components.tsx +2 -5
- package/src/tools/Chat/README.md +347 -530
- package/src/tools/Chat/components/Attachments.tsx +6 -1
- package/src/tools/Chat/components/ChatRoot.tsx +30 -2
- package/src/tools/Chat/components/Composer.tsx +20 -3
- package/src/tools/Chat/components/ErrorBanner.tsx +7 -3
- package/src/tools/Chat/components/MessageActions.tsx +3 -1
- package/src/tools/Chat/components/MessageBubble.tsx +6 -5
- package/src/tools/Chat/components/MessageList.tsx +87 -1
- package/src/tools/Chat/components/ToolCalls.tsx +21 -3
- package/src/tools/Chat/context/ChatProvider.tsx +21 -3
- package/src/tools/Chat/core/audio/audioBus.ts +10 -163
- package/src/tools/Chat/core/audio/defaults.ts +43 -0
- package/src/tools/Chat/core/audio/index.ts +1 -0
- package/src/tools/Chat/core/audio/preferences.ts +5 -59
- package/src/tools/Chat/core/audio/sounds/error.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/mention.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/notification.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/received.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/sent.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/start.mp3 +0 -0
- package/src/tools/Chat/core/audio/types.ts +28 -0
- package/src/tools/Chat/core/reducer.ts +33 -0
- package/src/tools/Chat/core/transport/index.ts +13 -0
- package/src/tools/Chat/core/transport/mappers/index.ts +6 -0
- package/src/tools/Chat/core/transport/mappers/pydantic-ai.ts +142 -0
- package/src/tools/Chat/core/transport/pydantic-ai-transport.ts +208 -0
- package/src/tools/Chat/core/transport/sse.ts +18 -5
- package/src/tools/Chat/hooks/index.ts +25 -0
- package/src/tools/Chat/hooks/useAutoFocusOnStreamEnd.ts +5 -3
- package/src/tools/Chat/hooks/useChat.ts +28 -0
- package/src/tools/Chat/hooks/useChatAudio.ts +59 -180
- package/src/tools/Chat/hooks/useChatDockPrefs.ts +74 -0
- package/src/tools/Chat/hooks/useChatReset.ts +70 -0
- package/src/tools/Chat/hooks/useChatUnread.ts +87 -0
- package/src/tools/Chat/hooks/useFocusOnEmptyClick.ts +111 -0
- package/src/tools/Chat/hooks/useVisitorFingerprint.ts +48 -0
- package/src/tools/Chat/index.ts +84 -1
- package/src/tools/Chat/launcher/ChatDock.tsx +263 -0
- package/src/tools/Chat/launcher/ChatFAB.tsx +349 -0
- package/src/tools/Chat/launcher/ChatGreeting.tsx +200 -0
- package/src/tools/Chat/launcher/ChatHeader.tsx +76 -0
- package/src/tools/Chat/launcher/ChatHeaderActionButton.tsx +87 -0
- package/src/tools/Chat/launcher/ChatHeaderAudioToggle.tsx +47 -0
- package/src/tools/Chat/launcher/ChatHeaderLanguageButton.tsx +179 -0
- package/src/tools/Chat/launcher/ChatHeaderModeToggle.tsx +57 -0
- package/src/tools/Chat/launcher/ChatHeaderResetButton.tsx +93 -0
- package/src/tools/Chat/launcher/ChatLauncher.tsx +321 -0
- package/src/tools/Chat/launcher/ChatUnreadPreview.tsx +197 -0
- package/src/tools/Chat/launcher/index.ts +46 -0
- package/src/tools/Chat/launcher/useChatPresence.ts +44 -0
- package/src/tools/Chat/styles/bubbleTokens.ts +71 -0
- package/src/tools/Chat/styles/index.ts +16 -0
- package/src/tools/Chat/styles/useChatStyles.ts +101 -0
- package/src/tools/Chat/types/attachment.ts +25 -0
- package/src/tools/Chat/types/config.ts +48 -0
- package/src/tools/Chat/types/events.ts +35 -0
- package/src/tools/Chat/types/index.ts +34 -0
- package/src/tools/Chat/types/labels.ts +38 -0
- package/src/tools/Chat/types/message.ts +32 -0
- package/src/tools/Chat/types/persona.ts +31 -0
- package/src/tools/Chat/types/session.ts +43 -0
- package/src/tools/Chat/types/tool-call.ts +17 -0
- package/src/tools/Chat/types/transport.ts +28 -0
- package/src/tools/Chat/types.ts +5 -240
- package/src/tools/MarkdownEditor/MarkdownEditor.tsx +50 -14
- package/src/tools/MarkdownEditor/index.ts +1 -1
- package/src/tools/SpeechRecognition/README.md +336 -0
- package/src/tools/SpeechRecognition/__tests__/ids.test.ts +15 -0
- package/src/tools/SpeechRecognition/__tests__/language.test.ts +59 -0
- package/src/tools/SpeechRecognition/__tests__/reducer.test.ts +71 -0
- package/src/tools/SpeechRecognition/__tests__/transcript.test.ts +52 -0
- package/src/tools/SpeechRecognition/components/DevicePicker.tsx +49 -0
- package/src/tools/SpeechRecognition/components/DictationButton.tsx +93 -0
- package/src/tools/SpeechRecognition/components/EngineBadge.tsx +30 -0
- package/src/tools/SpeechRecognition/components/ErrorBanner.tsx +52 -0
- package/src/tools/SpeechRecognition/components/LanguagePicker.tsx +63 -0
- package/src/tools/SpeechRecognition/components/MicMeter.tsx +63 -0
- package/src/tools/SpeechRecognition/components/PushToTalkHint.tsx +51 -0
- package/src/tools/SpeechRecognition/components/TranscriptView.tsx +55 -0
- package/src/tools/SpeechRecognition/components/index.ts +16 -0
- package/src/tools/SpeechRecognition/context/SpeechRecognitionProvider.tsx +47 -0
- package/src/tools/SpeechRecognition/context/index.ts +6 -0
- package/src/tools/SpeechRecognition/core/audio/defaults.ts +24 -0
- package/src/tools/SpeechRecognition/core/engine/external.ts +222 -0
- package/src/tools/SpeechRecognition/core/engine/http.ts +147 -0
- package/src/tools/SpeechRecognition/core/engine/index.ts +52 -0
- package/src/tools/SpeechRecognition/core/engine/mediarecorder.ts +105 -0
- package/src/tools/SpeechRecognition/core/engine/websocket.ts +211 -0
- package/src/tools/SpeechRecognition/core/engine/webspeech.ts +188 -0
- package/src/tools/SpeechRecognition/core/ids.ts +11 -0
- package/src/tools/SpeechRecognition/core/index.ts +14 -0
- package/src/tools/SpeechRecognition/core/language.ts +78 -0
- package/src/tools/SpeechRecognition/core/languages-catalog.ts +229 -0
- package/src/tools/SpeechRecognition/core/logger.ts +3 -0
- package/src/tools/SpeechRecognition/core/reducer.ts +105 -0
- package/src/tools/SpeechRecognition/core/transcript.ts +36 -0
- package/src/tools/SpeechRecognition/hooks/index.ts +14 -0
- package/src/tools/SpeechRecognition/hooks/useDictation.ts +59 -0
- package/src/tools/SpeechRecognition/hooks/useEnginePrefs.ts +15 -0
- package/src/tools/SpeechRecognition/hooks/useMicDevices.ts +57 -0
- package/src/tools/SpeechRecognition/hooks/useMicLevel.ts +52 -0
- package/src/tools/SpeechRecognition/hooks/usePushToTalk.ts +85 -0
- package/src/tools/SpeechRecognition/hooks/useResolvedLanguage.ts +28 -0
- package/src/tools/SpeechRecognition/hooks/useSpeechLanguageInfo.ts +108 -0
- package/src/tools/SpeechRecognition/hooks/useSpeechRecognition.ts +188 -0
- package/src/tools/SpeechRecognition/hooks/useVoiceSupport.ts +78 -0
- package/src/tools/SpeechRecognition/index.ts +82 -0
- package/src/tools/SpeechRecognition/lazy.tsx +19 -0
- package/src/tools/SpeechRecognition/store/index.ts +2 -0
- package/src/tools/SpeechRecognition/store/prefsStore.ts +54 -0
- package/src/tools/SpeechRecognition/types.ts +133 -0
- package/src/tools/SpeechRecognition/widgets/DictationField.tsx +105 -0
- package/src/tools/SpeechRecognition/widgets/VoiceComposerSlot.tsx +305 -0
- package/src/tools/SpeechRecognition/widgets/VoiceMessageRecorder.tsx +88 -0
- package/src/tools/SpeechRecognition/widgets/index.ts +6 -0
- package/dist/ChatRoot-EJC5Y2YM.cjs +0 -14
- package/dist/ChatRoot-QOSKJPM6.mjs +0 -5
- package/dist/chunk-NWUT327A.mjs.map +0 -1
- package/dist/chunk-QLMKCSR6.mjs +0 -2420
- package/dist/chunk-QLMKCSR6.mjs.map +0 -1
- package/dist/chunk-SI5RD2GD.cjs +0 -2460
- package/dist/chunk-SI5RD2GD.cjs.map +0 -1
- package/dist/chunk-XACCHZH2.cjs.map +0 -1
- package/src/components/markdown/MarkdownMessage/MarkdownMessage.story.tsx +0 -771
- package/src/stories/index.ts +0 -33
- package/src/tools/AudioPlayer/AudioPlayer.story.tsx +0 -481
- package/src/tools/Chat/Chat.story.tsx +0 -1457
- package/src/tools/CodeEditor/CodeEditor.story.tsx +0 -202
- package/src/tools/CronScheduler/CronScheduler.story.tsx +0 -300
- package/src/tools/Gallery/Gallery.story.tsx +0 -237
- package/src/tools/ImageViewer/ImageViewer.story.tsx +0 -85
- package/src/tools/JsonForm/JsonForm.story.tsx +0 -350
- package/src/tools/JsonTree/JsonTree.story.tsx +0 -141
- package/src/tools/LottiePlayer/LottiePlayer.story.tsx +0 -95
- package/src/tools/Map/Map.story.tsx +0 -458
- package/src/tools/MarkdownEditor/MarkdownEditor.story.tsx +0 -225
- package/src/tools/Mermaid/Mermaid.story.tsx +0 -251
- package/src/tools/OpenapiViewer/OpenapiViewer.story.tsx +0 -230
- package/src/tools/PrettyCode/PrettyCode.story.tsx +0 -304
- package/src/tools/Tour/Tour.story.tsx +0 -279
- package/src/tools/Tree/Tree.story.tsx +0 -620
- package/src/tools/Uploader/Uploader.story.tsx +0 -415
- package/src/tools/VideoPlayer/VideoPlayer.story.tsx +0 -87
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* External-controlled engine — for backends that own the entire
|
|
3
|
+
* record / decode / transcribe pipeline (Wails / Tauri / native
|
|
4
|
+
* sidecar). The frontend exposes "start" and "stop" verbs to the
|
|
5
|
+
* native layer, which then pushes a single `final` (or rolling
|
|
6
|
+
* `partial`s + `final`) back through events.
|
|
7
|
+
*
|
|
8
|
+
* Use this when:
|
|
9
|
+
* - Audio capture lives outside the browser (cmdop_go OS-wide
|
|
10
|
+
* hotkey, system audio device claims, etc.).
|
|
11
|
+
* - Transcription runs on the backend (whisper.cpp, Vosk, custom
|
|
12
|
+
* ONNX) and the browser never sees the raw audio.
|
|
13
|
+
*
|
|
14
|
+
* Compared to `createHttpEngine`/`createWebSocketEngine`:
|
|
15
|
+
* - No `MediaRecorder` / `getUserMedia` involvement.
|
|
16
|
+
* - `isSupported` defaults to `true` (the host knows whether the
|
|
17
|
+
* native side is present — let it gate via `supported`).
|
|
18
|
+
* - No `MediaStream` to expose, so the VU meter falls through to
|
|
19
|
+
* the host's own level event (cmdop wires it via a separate hook).
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { newSegmentId } from '../ids';
|
|
23
|
+
import { sttLogger } from '../logger';
|
|
24
|
+
import { createEngineBus } from './index';
|
|
25
|
+
import type {
|
|
26
|
+
EngineStartOptions,
|
|
27
|
+
RecognitionEngine,
|
|
28
|
+
RecognitionError,
|
|
29
|
+
Unsub,
|
|
30
|
+
} from '../../types';
|
|
31
|
+
|
|
32
|
+
export interface ExternalEngineHandle {
|
|
33
|
+
/**
|
|
34
|
+
* Push an interim transcript fragment. Wrapped into a `partial`
|
|
35
|
+
* event with a generated segment id. Subsequent calls before
|
|
36
|
+
* `emitFinal` mutate the same interim segment.
|
|
37
|
+
*/
|
|
38
|
+
emitPartial(text: string): void;
|
|
39
|
+
/**
|
|
40
|
+
* Push the final transcript. Closes the current segment; the engine
|
|
41
|
+
* emits `final` then transitions to `closed`.
|
|
42
|
+
*/
|
|
43
|
+
emitFinal(text: string, confidence?: number): void;
|
|
44
|
+
/** Surface a backend error. Engine transitions to `closed`. */
|
|
45
|
+
emitError(err: RecognitionError): void;
|
|
46
|
+
/**
|
|
47
|
+
* Notify the engine that the native side actually started capturing.
|
|
48
|
+
* Flips status to `listening`. Call from a backend `recording`
|
|
49
|
+
* event. Optional — see `autoMarkListening`.
|
|
50
|
+
*/
|
|
51
|
+
markListening(): void;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface ExternalEngineOptions {
|
|
55
|
+
/** Stable engine id for telemetry / UI badge. */
|
|
56
|
+
id?: string;
|
|
57
|
+
/**
|
|
58
|
+
* Whether the host believes the native side is available. Wire to
|
|
59
|
+
* a Wails ping / Tauri capability check. Defaults to `true`.
|
|
60
|
+
*/
|
|
61
|
+
supported?: boolean;
|
|
62
|
+
/** Ask the backend to start capture. */
|
|
63
|
+
onStart: (opts: EngineStartOptions) => Promise<void> | void;
|
|
64
|
+
/** Ask the backend to stop capture (and finalise the buffer). */
|
|
65
|
+
onStop: () => Promise<void> | void;
|
|
66
|
+
/**
|
|
67
|
+
* Optional hard cancel — `onStop` may finalise, while `onAbort`
|
|
68
|
+
* discards. Falls back to `onStop` when omitted.
|
|
69
|
+
*/
|
|
70
|
+
onAbort?: () => Promise<void> | void;
|
|
71
|
+
/**
|
|
72
|
+
* Subscribe to backend events. Called once per `start()`. The host
|
|
73
|
+
* wires its native event source (Wails `EventsOn`, Tauri
|
|
74
|
+
* `appWindow.listen`, …) and uses the supplied `handle` to push
|
|
75
|
+
* transcript fragments through the engine bus.
|
|
76
|
+
*
|
|
77
|
+
* Must return an unsubscribe function so the engine can detach on
|
|
78
|
+
* teardown.
|
|
79
|
+
*/
|
|
80
|
+
subscribe: (handle: ExternalEngineHandle) => Unsub;
|
|
81
|
+
/**
|
|
82
|
+
* If `true` (default), the engine flips state to `listening` right
|
|
83
|
+
* after `onStart` resolves. Set `false` and call
|
|
84
|
+
* `handle.markListening()` explicitly when you want to wait for the
|
|
85
|
+
* native side to confirm the capture session opened.
|
|
86
|
+
*/
|
|
87
|
+
autoMarkListening?: boolean;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Wraps a backend-driven STT pipeline into the standard
|
|
92
|
+
* `RecognitionEngine` shape so it works with `useSpeechRecognition`,
|
|
93
|
+
* `VoiceComposerSlot`, and every other piece of the SpeechRecognition
|
|
94
|
+
* tool.
|
|
95
|
+
*
|
|
96
|
+
* Example (cmdop Wails):
|
|
97
|
+
*
|
|
98
|
+
* ```ts
|
|
99
|
+
* import { EventsOn } from '@runtime';
|
|
100
|
+
* import * as VoiceService from '@bindings/desktop/services/voice/service';
|
|
101
|
+
*
|
|
102
|
+
* const engine = createExternalEngine({
|
|
103
|
+
* id: 'wails-whisper',
|
|
104
|
+
* onStart: () => VoiceService.StartRecordingForChat(),
|
|
105
|
+
* onStop: () => VoiceService.StopRecordingForChat(),
|
|
106
|
+
* subscribe: (handle) => {
|
|
107
|
+
* const offText = EventsOn('voice:chat-text', (p) => {
|
|
108
|
+
* if (p?.error) handle.emitError({ code: 'engine', message: p.error });
|
|
109
|
+
* else if (p?.text) handle.emitFinal(p.text);
|
|
110
|
+
* else handle.emitError({ code: 'no-speech', message: '' });
|
|
111
|
+
* });
|
|
112
|
+
* const offState = EventsOn('voice:state', (s) => {
|
|
113
|
+
* if (s.state === 'recording' || s.state === 'streaming') {
|
|
114
|
+
* handle.markListening();
|
|
115
|
+
* }
|
|
116
|
+
* if (s.partial) handle.emitPartial(s.partial);
|
|
117
|
+
* });
|
|
118
|
+
* return () => { offText(); offState(); };
|
|
119
|
+
* },
|
|
120
|
+
* });
|
|
121
|
+
* ```
|
|
122
|
+
*/
|
|
123
|
+
export function createExternalEngine(
|
|
124
|
+
opts: ExternalEngineOptions,
|
|
125
|
+
): RecognitionEngine {
|
|
126
|
+
const bus = createEngineBus();
|
|
127
|
+
let currentSegmentId: string | null = null;
|
|
128
|
+
let unsubscribe: Unsub | null = null;
|
|
129
|
+
let running = false;
|
|
130
|
+
|
|
131
|
+
function teardown(): void {
|
|
132
|
+
unsubscribe?.();
|
|
133
|
+
unsubscribe = null;
|
|
134
|
+
currentSegmentId = null;
|
|
135
|
+
running = false;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const handle: ExternalEngineHandle = {
|
|
139
|
+
emitPartial(text: string): void {
|
|
140
|
+
if (!running) return;
|
|
141
|
+
if (!currentSegmentId) currentSegmentId = newSegmentId();
|
|
142
|
+
bus.emit('partial', text, currentSegmentId);
|
|
143
|
+
},
|
|
144
|
+
emitFinal(text: string, confidence?: number): void {
|
|
145
|
+
if (!running) return;
|
|
146
|
+
const id = currentSegmentId ?? newSegmentId();
|
|
147
|
+
bus.emit('final', text, id, confidence);
|
|
148
|
+
// External engines almost always go idle right after their
|
|
149
|
+
// final — close the session so consumers' `onStop` fires
|
|
150
|
+
// without requiring a separate `stop()` call.
|
|
151
|
+
bus.emit('state', 'closed');
|
|
152
|
+
teardown();
|
|
153
|
+
},
|
|
154
|
+
emitError(err: RecognitionError): void {
|
|
155
|
+
bus.emit('error', err);
|
|
156
|
+
bus.emit('state', 'closed');
|
|
157
|
+
teardown();
|
|
158
|
+
},
|
|
159
|
+
markListening(): void {
|
|
160
|
+
if (!running) return;
|
|
161
|
+
bus.emit('state', 'listening');
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
id: opts.id ?? 'external',
|
|
167
|
+
isSupported: opts.supported ?? true,
|
|
168
|
+
on(event, cb): Unsub {
|
|
169
|
+
return bus.on(event, cb);
|
|
170
|
+
},
|
|
171
|
+
async start(start: EngineStartOptions): Promise<void> {
|
|
172
|
+
if (running) return;
|
|
173
|
+
running = true;
|
|
174
|
+
bus.emit('state', 'connecting');
|
|
175
|
+
// Subscribe before the native side starts so we never miss the
|
|
176
|
+
// first event (some backends emit `recording` synchronously).
|
|
177
|
+
unsubscribe = opts.subscribe(handle);
|
|
178
|
+
try {
|
|
179
|
+
await opts.onStart(start);
|
|
180
|
+
} catch (cause) {
|
|
181
|
+
const err: RecognitionError = {
|
|
182
|
+
code: 'engine',
|
|
183
|
+
message: 'External engine failed to start.',
|
|
184
|
+
cause,
|
|
185
|
+
};
|
|
186
|
+
bus.emit('error', err);
|
|
187
|
+
bus.emit('state', 'closed');
|
|
188
|
+
teardown();
|
|
189
|
+
throw err;
|
|
190
|
+
}
|
|
191
|
+
if (opts.autoMarkListening !== false) {
|
|
192
|
+
bus.emit('state', 'listening');
|
|
193
|
+
}
|
|
194
|
+
start.signal?.addEventListener('abort', () => {
|
|
195
|
+
this.abort();
|
|
196
|
+
});
|
|
197
|
+
},
|
|
198
|
+
async stop(): Promise<void> {
|
|
199
|
+
if (!running) return;
|
|
200
|
+
bus.emit('state', 'closing');
|
|
201
|
+
try {
|
|
202
|
+
await opts.onStop();
|
|
203
|
+
} catch (cause) {
|
|
204
|
+
sttLogger.warn('[external] onStop threw', cause);
|
|
205
|
+
}
|
|
206
|
+
// Note: we DO NOT flip to `closed` here — most external engines
|
|
207
|
+
// need a roundtrip (transcribe + LLM rewrite) before the final
|
|
208
|
+
// text arrives. `emitFinal` / `emitError` are responsible for
|
|
209
|
+
// closing the session.
|
|
210
|
+
},
|
|
211
|
+
abort(): void {
|
|
212
|
+
if (!running) return;
|
|
213
|
+
bus.emit('state', 'closing');
|
|
214
|
+
const stopper = opts.onAbort ?? opts.onStop;
|
|
215
|
+
Promise.resolve(stopper()).catch((cause) => {
|
|
216
|
+
sttLogger.warn('[external] abort hook threw', cause);
|
|
217
|
+
});
|
|
218
|
+
bus.emit('state', 'closed');
|
|
219
|
+
teardown();
|
|
220
|
+
},
|
|
221
|
+
};
|
|
222
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTTP engine — records audio with MediaRecorder and POSTs each chunk to
|
|
3
|
+
* a host-supplied URL. The host owns response parsing via `parse()`, so
|
|
4
|
+
* this engine works with OpenAI Whisper REST, custom Django/FastAPI
|
|
5
|
+
* endpoints, or anything else that takes audio and returns text.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { newSegmentId } from '../ids';
|
|
9
|
+
import { sttLogger } from '../logger';
|
|
10
|
+
import { createEngineBus } from './index';
|
|
11
|
+
import { startMicCapture, type MicCaptureHandle } from './mediarecorder';
|
|
12
|
+
import type {
|
|
13
|
+
EngineStartOptions,
|
|
14
|
+
RecognitionEngine,
|
|
15
|
+
RecognitionError,
|
|
16
|
+
Unsub,
|
|
17
|
+
} from '../../types';
|
|
18
|
+
|
|
19
|
+
export interface HttpEngineParseResult {
|
|
20
|
+
text: string;
|
|
21
|
+
isFinal: boolean;
|
|
22
|
+
/** Optional engine-provided confidence 0..1. */
|
|
23
|
+
confidence?: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface HttpEngineOptions {
|
|
27
|
+
/** Endpoint URL. Receives `POST` with the audio chunk as the body. */
|
|
28
|
+
url: string | ((language: string) => string);
|
|
29
|
+
/** Per-request headers, awaited each chunk so tokens can be refreshed. */
|
|
30
|
+
headers?: () => Promise<Record<string, string>> | Record<string, string>;
|
|
31
|
+
/** Chunk emission interval, ms. Default 750 — long enough for useful audio. */
|
|
32
|
+
chunkMs?: number;
|
|
33
|
+
/** Preferred MIME for the encoder. Probed against `MediaRecorder` support. */
|
|
34
|
+
mime?: string;
|
|
35
|
+
/** Parse the engine response — return null/undefined to skip emit. */
|
|
36
|
+
parse: (
|
|
37
|
+
resp: Response,
|
|
38
|
+
) => Promise<HttpEngineParseResult | null | undefined> | HttpEngineParseResult | null | undefined;
|
|
39
|
+
/** Stable engine id for telemetry / UI badge. Default 'http'. */
|
|
40
|
+
id?: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function createHttpEngine(opts: HttpEngineOptions): RecognitionEngine {
|
|
44
|
+
const bus = createEngineBus();
|
|
45
|
+
let capture: MicCaptureHandle | null = null;
|
|
46
|
+
let currentSegmentId: string | null = null;
|
|
47
|
+
let ctrl: AbortController | null = null;
|
|
48
|
+
let stopping = false;
|
|
49
|
+
|
|
50
|
+
async function sendChunk(blob: Blob, language: string): Promise<void> {
|
|
51
|
+
if (stopping) return;
|
|
52
|
+
const url = typeof opts.url === 'function' ? opts.url(language) : opts.url;
|
|
53
|
+
const headers = (await opts.headers?.()) ?? {};
|
|
54
|
+
try {
|
|
55
|
+
const resp = await fetch(url, {
|
|
56
|
+
method: 'POST',
|
|
57
|
+
headers,
|
|
58
|
+
body: blob,
|
|
59
|
+
signal: ctrl?.signal,
|
|
60
|
+
});
|
|
61
|
+
if (!resp.ok) {
|
|
62
|
+
bus.emit('error', {
|
|
63
|
+
code: 'network',
|
|
64
|
+
message: `STT endpoint returned ${resp.status}`,
|
|
65
|
+
});
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const parsed = await opts.parse(resp);
|
|
69
|
+
if (!parsed || !parsed.text) return;
|
|
70
|
+
if (!currentSegmentId) currentSegmentId = newSegmentId();
|
|
71
|
+
if (parsed.isFinal) {
|
|
72
|
+
bus.emit('final', parsed.text, currentSegmentId, parsed.confidence);
|
|
73
|
+
currentSegmentId = null;
|
|
74
|
+
} else {
|
|
75
|
+
bus.emit('partial', parsed.text, currentSegmentId);
|
|
76
|
+
}
|
|
77
|
+
} catch (cause) {
|
|
78
|
+
if ((cause as { name?: string })?.name === 'AbortError') return;
|
|
79
|
+
sttLogger.warn('[http] chunk send failed', cause);
|
|
80
|
+
bus.emit('error', {
|
|
81
|
+
code: 'network',
|
|
82
|
+
message: 'Failed to deliver audio chunk to STT endpoint.',
|
|
83
|
+
cause,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
id: opts.id ?? 'http',
|
|
90
|
+
isSupported:
|
|
91
|
+
typeof navigator !== 'undefined' &&
|
|
92
|
+
!!navigator.mediaDevices?.getUserMedia &&
|
|
93
|
+
typeof MediaRecorder !== 'undefined',
|
|
94
|
+
on(event, cb): Unsub {
|
|
95
|
+
return bus.on(event, cb);
|
|
96
|
+
},
|
|
97
|
+
async start(start: EngineStartOptions): Promise<void> {
|
|
98
|
+
if (capture) return;
|
|
99
|
+
stopping = false;
|
|
100
|
+
ctrl = new AbortController();
|
|
101
|
+
bus.emit('state', 'connecting');
|
|
102
|
+
try {
|
|
103
|
+
capture = await startMicCapture({
|
|
104
|
+
deviceId: start.deviceId,
|
|
105
|
+
mime: opts.mime,
|
|
106
|
+
chunkMs: opts.chunkMs ?? 750,
|
|
107
|
+
onChunk: (chunk) => {
|
|
108
|
+
void sendChunk(chunk, start.language);
|
|
109
|
+
},
|
|
110
|
+
onError: (err) => bus.emit('error', err),
|
|
111
|
+
});
|
|
112
|
+
bus.emit('state', 'listening');
|
|
113
|
+
} catch (cause) {
|
|
114
|
+
const err = cause as RecognitionError;
|
|
115
|
+
bus.emit('error', err);
|
|
116
|
+
bus.emit('state', 'error');
|
|
117
|
+
throw err;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
start.signal?.addEventListener('abort', () => {
|
|
121
|
+
void this.stop();
|
|
122
|
+
});
|
|
123
|
+
},
|
|
124
|
+
async stop(): Promise<void> {
|
|
125
|
+
stopping = true;
|
|
126
|
+
bus.emit('state', 'closing');
|
|
127
|
+
ctrl?.abort();
|
|
128
|
+
ctrl = null;
|
|
129
|
+
await capture?.stop();
|
|
130
|
+
capture = null;
|
|
131
|
+
currentSegmentId = null;
|
|
132
|
+
bus.emit('state', 'closed');
|
|
133
|
+
},
|
|
134
|
+
abort(): void {
|
|
135
|
+
stopping = true;
|
|
136
|
+
ctrl?.abort();
|
|
137
|
+
ctrl = null;
|
|
138
|
+
capture?.stop().catch(() => undefined);
|
|
139
|
+
capture = null;
|
|
140
|
+
currentSegmentId = null;
|
|
141
|
+
bus.emit('state', 'closed');
|
|
142
|
+
},
|
|
143
|
+
getStream(): MediaStream | null {
|
|
144
|
+
return capture?.stream ?? null;
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tiny event-bus helper shared by every engine. Lets engine authors avoid
|
|
3
|
+
* re-implementing add/remove listener bookkeeping while keeping the
|
|
4
|
+
* public `RecognitionEngine.on(...)` contract identical across engines.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { EngineEventMap, Unsub } from '../../types';
|
|
8
|
+
|
|
9
|
+
type Listeners = {
|
|
10
|
+
[K in keyof EngineEventMap]: Set<EngineEventMap[K]>;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export function createEngineBus(): {
|
|
14
|
+
on: <K extends keyof EngineEventMap>(event: K, cb: EngineEventMap[K]) => Unsub;
|
|
15
|
+
emit: <K extends keyof EngineEventMap>(
|
|
16
|
+
event: K,
|
|
17
|
+
...args: Parameters<EngineEventMap[K]>
|
|
18
|
+
) => void;
|
|
19
|
+
clear: () => void;
|
|
20
|
+
} {
|
|
21
|
+
const listeners: Listeners = {
|
|
22
|
+
partial: new Set(),
|
|
23
|
+
final: new Set(),
|
|
24
|
+
error: new Set(),
|
|
25
|
+
state: new Set(),
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
return {
|
|
29
|
+
on(event, cb) {
|
|
30
|
+
const set = listeners[event] as Set<typeof cb>;
|
|
31
|
+
set.add(cb);
|
|
32
|
+
return () => {
|
|
33
|
+
set.delete(cb);
|
|
34
|
+
};
|
|
35
|
+
},
|
|
36
|
+
emit(event, ...args) {
|
|
37
|
+
const set = listeners[event];
|
|
38
|
+
for (const cb of set) {
|
|
39
|
+
try {
|
|
40
|
+
(cb as (...a: unknown[]) => void)(...(args as unknown[]));
|
|
41
|
+
} catch {
|
|
42
|
+
// listener errors are isolated — never break the engine loop
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
},
|
|
46
|
+
clear() {
|
|
47
|
+
for (const key of Object.keys(listeners) as Array<keyof Listeners>) {
|
|
48
|
+
listeners[key].clear();
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared mic capture used by the HTTP and WebSocket engines.
|
|
3
|
+
*
|
|
4
|
+
* Probes the browser for a working `MediaRecorder` MIME type and emits
|
|
5
|
+
* `Blob` chunks on a steady interval. Picks the first supported MIME in
|
|
6
|
+
* order: `audio/webm;codecs=opus` → `audio/ogg;codecs=opus` →
|
|
7
|
+
* `audio/mp4;codecs=mp4a`. Falls back to engine default if none match.
|
|
8
|
+
*
|
|
9
|
+
* The capture also exposes the raw `MediaStream` so callers can wire up
|
|
10
|
+
* an `AnalyserNode` for the level meter without owning a second copy.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { sttLogger } from '../logger';
|
|
14
|
+
import type { RecognitionError } from '../../types';
|
|
15
|
+
|
|
16
|
+
const PREFERRED_MIMES = [
|
|
17
|
+
'audio/webm;codecs=opus',
|
|
18
|
+
'audio/ogg;codecs=opus',
|
|
19
|
+
'audio/mp4;codecs=mp4a',
|
|
20
|
+
'audio/webm',
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
export function pickMime(preferred?: string): string | undefined {
|
|
24
|
+
if (typeof MediaRecorder === 'undefined') return undefined;
|
|
25
|
+
const candidates = preferred ? [preferred, ...PREFERRED_MIMES] : PREFERRED_MIMES;
|
|
26
|
+
for (const mime of candidates) {
|
|
27
|
+
if (MediaRecorder.isTypeSupported(mime)) return mime;
|
|
28
|
+
}
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface MicCaptureOptions {
|
|
33
|
+
deviceId?: string;
|
|
34
|
+
/** Override probed MIME — useful when the backend expects a specific codec. */
|
|
35
|
+
mime?: string;
|
|
36
|
+
/** Chunk emission interval, ms. Default 250. */
|
|
37
|
+
chunkMs?: number;
|
|
38
|
+
onChunk: (chunk: Blob) => void;
|
|
39
|
+
onError?: (err: RecognitionError) => void;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface MicCaptureHandle {
|
|
43
|
+
readonly stream: MediaStream;
|
|
44
|
+
readonly mime: string | undefined;
|
|
45
|
+
stop(): Promise<void>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function toErr(code: RecognitionError['code'], message: string, cause?: unknown): RecognitionError {
|
|
49
|
+
return { code, message, cause };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export async function startMicCapture(
|
|
53
|
+
opts: MicCaptureOptions,
|
|
54
|
+
): Promise<MicCaptureHandle> {
|
|
55
|
+
if (typeof navigator === 'undefined' || !navigator.mediaDevices?.getUserMedia) {
|
|
56
|
+
throw toErr('unsupported', 'getUserMedia is not available in this environment.');
|
|
57
|
+
}
|
|
58
|
+
if (typeof MediaRecorder === 'undefined') {
|
|
59
|
+
throw toErr('unsupported', 'MediaRecorder is not available in this environment.');
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let stream: MediaStream;
|
|
63
|
+
try {
|
|
64
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
65
|
+
audio: opts.deviceId ? { deviceId: { exact: opts.deviceId } } : true,
|
|
66
|
+
video: false,
|
|
67
|
+
});
|
|
68
|
+
} catch (cause) {
|
|
69
|
+
const name = (cause as { name?: string })?.name;
|
|
70
|
+
if (name === 'NotAllowedError' || name === 'SecurityError') {
|
|
71
|
+
throw toErr('permission-denied', 'Microphone permission denied.', cause);
|
|
72
|
+
}
|
|
73
|
+
if (name === 'NotFoundError' || name === 'OverconstrainedError') {
|
|
74
|
+
throw toErr('no-microphone', 'No microphone found matching the constraints.', cause);
|
|
75
|
+
}
|
|
76
|
+
throw toErr('unknown', 'Failed to access microphone.', cause);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const mime = pickMime(opts.mime);
|
|
80
|
+
const rec = mime ? new MediaRecorder(stream, { mimeType: mime }) : new MediaRecorder(stream);
|
|
81
|
+
|
|
82
|
+
rec.ondataavailable = (e) => {
|
|
83
|
+
if (e.data && e.data.size > 0) opts.onChunk(e.data);
|
|
84
|
+
};
|
|
85
|
+
rec.onerror = (e) => {
|
|
86
|
+
const err = toErr('engine', 'MediaRecorder error.', e);
|
|
87
|
+
sttLogger.warn('[capture] recorder error', e);
|
|
88
|
+
opts.onError?.(err);
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
rec.start(opts.chunkMs ?? 250);
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
stream,
|
|
95
|
+
mime: mime ?? rec.mimeType,
|
|
96
|
+
async stop() {
|
|
97
|
+
const done = new Promise<void>((resolve) => {
|
|
98
|
+
rec.addEventListener('stop', () => resolve(), { once: true });
|
|
99
|
+
});
|
|
100
|
+
if (rec.state !== 'inactive') rec.stop();
|
|
101
|
+
stream.getTracks().forEach((t) => t.stop());
|
|
102
|
+
await done;
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
}
|