@oh-my-pi/pi-coding-agent 15.12.4 → 15.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +304 -6
- package/dist/cli.js +1015 -881
- package/dist/types/async/job-manager.d.ts +15 -0
- package/dist/types/autolearn/controller.d.ts +25 -0
- package/dist/types/autolearn/managed-skills.d.ts +45 -0
- package/dist/types/autoresearch/state.d.ts +1 -1
- package/dist/types/autoresearch/types.d.ts +1 -1
- package/dist/types/cli/args.d.ts +19 -1
- package/dist/types/cli/session-picker.d.ts +1 -1
- package/dist/types/cli/setup-cli.d.ts +1 -1
- package/dist/types/cli/setup-model-picker.d.ts +14 -0
- package/dist/types/collab/protocol.d.ts +1 -1
- package/dist/types/commands/say.d.ts +24 -0
- package/dist/types/config/keybindings.d.ts +3 -3
- package/dist/types/config/model-registry.d.ts +10 -0
- package/dist/types/config/models-config-schema.d.ts +12 -0
- package/dist/types/config/models-config.d.ts +8 -2
- package/dist/types/config/settings-schema.d.ts +261 -58
- package/dist/types/export/html/index.d.ts +2 -1
- package/dist/types/extensibility/extensions/model-api.d.ts +17 -0
- package/dist/types/extensibility/extensions/runner.d.ts +3 -1
- package/dist/types/extensibility/extensions/types.d.ts +47 -1
- package/dist/types/extensibility/hooks/index.d.ts +2 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +9 -0
- package/dist/types/extensibility/plugins/loader.d.ts +11 -0
- package/dist/types/extensibility/shared-events.d.ts +1 -1
- package/dist/types/extensibility/skills.d.ts +10 -0
- package/dist/types/goals/guided-setup.d.ts +18 -0
- package/dist/types/goals/state.d.ts +1 -1
- package/dist/types/hindsight/transcript.d.ts +1 -1
- package/dist/types/index.d.ts +5 -0
- package/dist/types/internal-urls/local-protocol.d.ts +4 -2
- package/dist/types/main.d.ts +4 -3
- package/dist/types/mcp/startup-events.d.ts +11 -0
- package/dist/types/memories/index.d.ts +7 -0
- package/dist/types/memory-backend/local-backend.d.ts +4 -3
- package/dist/types/mnemopi/config.d.ts +4 -4
- package/dist/types/modes/components/agent-hub.d.ts +6 -0
- package/dist/types/modes/components/assistant-message.d.ts +1 -2
- package/dist/types/modes/components/compaction-summary-message.d.ts +15 -1
- package/dist/types/modes/components/custom-editor.d.ts +39 -1
- package/dist/types/modes/components/custom-editor.test.d.ts +1 -0
- package/dist/types/modes/components/session-selector.d.ts +1 -1
- package/dist/types/modes/components/tool-execution.d.ts +26 -16
- package/dist/types/modes/components/transcript-container.d.ts +23 -2
- package/dist/types/modes/components/tree-selector.d.ts +1 -1
- package/dist/types/modes/components/usage-row.d.ts +3 -0
- package/dist/types/modes/controllers/command-controller.d.ts +2 -2
- package/dist/types/modes/controllers/input-controller.d.ts +14 -0
- package/dist/types/modes/controllers/selector-controller.d.ts +3 -1
- package/dist/types/modes/gradient-highlight.d.ts +9 -4
- package/dist/types/modes/image-references.d.ts +6 -0
- package/dist/types/modes/interactive-mode.d.ts +27 -3
- package/dist/types/modes/magic-keywords.d.ts +13 -1
- package/dist/types/modes/rpc/rpc-mode.d.ts +35 -1
- package/dist/types/modes/rpc/rpc-types.d.ts +9 -1
- package/dist/types/modes/runtime-init.d.ts +4 -0
- package/dist/types/modes/theme/theme.d.ts +13 -2
- package/dist/types/modes/types.d.ts +8 -2
- package/dist/types/modes/utils/ui-helpers.d.ts +1 -1
- package/dist/types/registry/agent-registry.d.ts +17 -0
- package/dist/types/secrets/obfuscator.d.ts +1 -1
- package/dist/types/session/agent-session.d.ts +14 -2
- package/dist/types/session/indexed-session-storage.d.ts +3 -4
- package/dist/types/session/session-context.d.ts +39 -0
- package/dist/types/session/session-entries.d.ts +159 -0
- package/dist/types/session/session-listing.d.ts +69 -0
- package/dist/types/session/session-loader.d.ts +16 -0
- package/dist/types/session/session-manager.d.ts +82 -474
- package/dist/types/session/session-migrations.d.ts +12 -0
- package/dist/types/session/session-paths.d.ts +25 -0
- package/dist/types/session/session-persistence.d.ts +8 -0
- package/dist/types/session/session-storage.d.ts +11 -12
- package/dist/types/session/snapcompact-inline.d.ts +12 -1
- package/dist/types/session/snapcompact-savings-journal.d.ts +46 -0
- package/dist/types/session/tool-choice-queue.d.ts +6 -6
- package/dist/types/stt/asr-client.d.ts +90 -0
- package/dist/types/stt/asr-protocol.d.ts +97 -0
- package/dist/types/stt/asr-worker.d.ts +2 -0
- package/dist/types/stt/downloader.d.ts +38 -0
- package/dist/types/stt/endpointer.d.ts +59 -0
- package/dist/types/stt/index.d.ts +5 -1
- package/dist/types/stt/models.d.ts +120 -0
- package/dist/types/stt/recorder.d.ts +17 -0
- package/dist/types/stt/stt-controller.d.ts +6 -0
- package/dist/types/stt/transcriber.d.ts +5 -7
- package/dist/types/stt/wav.d.ts +29 -0
- package/dist/types/system-prompt.d.ts +4 -0
- package/dist/types/task/executor.d.ts +2 -0
- package/dist/types/task/index.d.ts +9 -1
- package/dist/types/task/types.d.ts +36 -0
- package/dist/types/tools/bash.d.ts +2 -2
- package/dist/types/tools/eval-render.d.ts +1 -1
- package/dist/types/tools/index.d.ts +11 -1
- package/dist/types/tools/irc.d.ts +1 -0
- package/dist/types/tools/learn.d.ts +51 -0
- package/dist/types/tools/manage-skill.d.ts +40 -0
- package/dist/types/tools/plan-mode-guard.d.ts +10 -0
- package/dist/types/tools/renderers.d.ts +7 -11
- package/dist/types/tools/ssh.d.ts +1 -1
- package/dist/types/tools/todo.d.ts +1 -1
- package/dist/types/tools/tts.d.ts +25 -0
- package/dist/types/tools/write.d.ts +1 -1
- package/dist/types/tts/downloader.d.ts +20 -0
- package/dist/types/tts/index.d.ts +8 -0
- package/dist/types/tts/models.d.ts +82 -0
- package/dist/types/tts/player.d.ts +32 -0
- package/dist/types/tts/runtime.d.ts +6 -0
- package/dist/types/tts/streaming-player.d.ts +41 -0
- package/dist/types/tts/tts-client.d.ts +93 -0
- package/dist/types/tts/tts-protocol.d.ts +95 -0
- package/dist/types/tts/tts-worker.d.ts +2 -0
- package/dist/types/tts/vocalizer.d.ts +41 -0
- package/dist/types/tts/wav.d.ts +8 -0
- package/dist/types/utils/tool-choice.d.ts +8 -0
- package/dist/types/utils/tools-manager.d.ts +2 -1
- package/dist/types/utils/tools-manager.test.d.ts +1 -0
- package/dist/types/web/scrapers/github.d.ts +1 -1
- package/package.json +15 -14
- package/src/async/job-manager.ts +49 -0
- package/src/autolearn/controller.ts +139 -0
- package/src/autolearn/managed-skills.ts +257 -0
- package/src/autoresearch/state.ts +1 -1
- package/src/autoresearch/types.ts +1 -1
- package/src/cli/args.ts +56 -2
- package/src/cli/session-picker.ts +2 -1
- package/src/cli/setup-cli.ts +148 -47
- package/src/cli/setup-model-picker.ts +43 -0
- package/src/cli-commands.ts +1 -0
- package/src/cli.ts +45 -13
- package/src/collab/host.ts +1 -1
- package/src/collab/protocol.ts +1 -1
- package/src/commands/say.ts +102 -0
- package/src/commands/setup.ts +1 -1
- package/src/commit/agentic/tools/analyze-file.ts +3 -0
- package/src/config/keybindings.ts +2 -2
- package/src/config/model-discovery.ts +11 -5
- package/src/config/model-registry.ts +64 -9
- package/src/config/models-config-schema.ts +4 -1
- package/src/config/models-config.ts +2 -1
- package/src/config/settings-schema.ts +248 -32
- package/src/config/settings.ts +10 -0
- package/src/discovery/builtin.ts +23 -1
- package/src/discovery/claude-plugins.ts +44 -5
- package/src/discovery/helpers.ts +41 -1
- package/src/eval/__tests__/budget-bridge.test.ts +1 -1
- package/src/eval/js/shared/prelude.txt +69 -17
- package/src/export/html/index.ts +3 -6
- package/src/extensibility/extensions/model-api.ts +41 -0
- package/src/extensibility/extensions/runner.ts +4 -0
- package/src/extensibility/extensions/types.ts +52 -1
- package/src/extensibility/extensions/wrapper.ts +41 -5
- package/src/extensibility/hooks/index.ts +2 -1
- package/src/extensibility/plugins/legacy-pi-compat.ts +43 -13
- package/src/extensibility/plugins/loader.ts +30 -19
- package/src/extensibility/plugins/manager.ts +221 -90
- package/src/extensibility/shared-events.ts +1 -1
- package/src/extensibility/skills.ts +96 -15
- package/src/goals/guided-setup.ts +133 -0
- package/src/goals/state.ts +1 -1
- package/src/hindsight/transcript.ts +1 -1
- package/src/index.ts +5 -0
- package/src/internal-urls/docs-index.generated.ts +10 -10
- package/src/internal-urls/history-protocol.ts +1 -1
- package/src/internal-urls/local-protocol.ts +29 -7
- package/src/main.ts +27 -7
- package/src/mcp/startup-events.ts +21 -0
- package/src/mcp/transports/stdio.ts +2 -1
- package/src/memories/index.ts +146 -11
- package/src/memory-backend/local-backend.ts +11 -5
- package/src/mnemopi/backend.ts +1 -0
- package/src/mnemopi/config.ts +26 -10
- package/src/modes/acp/acp-agent.ts +3 -5
- package/src/modes/components/agent-hub.ts +49 -4
- package/src/modes/components/assistant-message.ts +4 -37
- package/src/modes/components/compaction-summary-message.ts +125 -26
- package/src/modes/components/custom-editor.test.ts +96 -0
- package/src/modes/components/custom-editor.ts +164 -8
- package/src/modes/components/session-selector.ts +1 -1
- package/src/modes/components/settings-defs.ts +7 -0
- package/src/modes/components/tool-execution.ts +82 -43
- package/src/modes/components/transcript-container.ts +70 -1
- package/src/modes/components/tree-selector.ts +1 -1
- package/src/modes/components/usage-row.ts +18 -0
- package/src/modes/components/user-message.ts +4 -2
- package/src/modes/controllers/command-controller.ts +14 -4
- package/src/modes/controllers/event-controller.ts +78 -11
- package/src/modes/controllers/extension-ui-controller.ts +6 -0
- package/src/modes/controllers/input-controller.ts +258 -27
- package/src/modes/controllers/selector-controller.ts +12 -2
- package/src/modes/gradient-highlight.ts +21 -9
- package/src/modes/image-references.ts +20 -0
- package/src/modes/interactive-mode.ts +286 -40
- package/src/modes/magic-keywords.ts +27 -5
- package/src/modes/rpc/rpc-mode.ts +146 -14
- package/src/modes/rpc/rpc-subagents.ts +2 -2
- package/src/modes/rpc/rpc-types.ts +8 -2
- package/src/modes/runtime-init.ts +28 -3
- package/src/modes/theme/theme.ts +98 -50
- package/src/modes/types.ts +6 -2
- package/src/modes/utils/hotkeys-markdown.ts +1 -1
- package/src/modes/utils/ui-helpers.ts +34 -6
- package/src/priority.json +5 -1
- package/src/prompts/agents/task.md +1 -0
- package/src/prompts/goals/guided-goal-interview.md +8 -0
- package/src/prompts/goals/guided-goal-system.md +12 -0
- package/src/prompts/memories/read-path.md +6 -0
- package/src/prompts/system/autolearn-guidance-learn.md +1 -0
- package/src/prompts/system/autolearn-guidance.md +7 -0
- package/src/prompts/system/autolearn-nudge.md +3 -0
- package/src/prompts/system/eager-task.md +7 -0
- package/src/prompts/system/eager-todo.md +11 -6
- package/src/prompts/system/subagent-system-prompt.md +4 -0
- package/src/prompts/system/system-prompt.md +10 -5
- package/src/prompts/system/title-marker-instruction.md +1 -0
- package/src/prompts/system/title-system-marker.md +16 -0
- package/src/prompts/tools/job.md +1 -0
- package/src/prompts/tools/learn.md +7 -0
- package/src/prompts/tools/manage-skill.md +9 -0
- package/src/prompts/tools/task.md +3 -0
- package/src/registry/agent-registry.ts +30 -0
- package/src/sdk.ts +88 -24
- package/src/secrets/obfuscator.ts +1 -1
- package/src/session/agent-session.ts +209 -87
- package/src/session/history-storage.ts +2 -2
- package/src/session/indexed-session-storage.ts +7 -17
- package/src/session/session-context.ts +352 -0
- package/src/session/session-entries.ts +194 -0
- package/src/session/session-listing.ts +588 -0
- package/src/session/session-loader.ts +106 -0
- package/src/session/session-manager.ts +933 -3145
- package/src/session/session-migrations.ts +78 -0
- package/src/session/session-paths.ts +193 -0
- package/src/session/session-persistence.ts +131 -0
- package/src/session/session-storage.ts +91 -50
- package/src/session/snapcompact-inline.ts +21 -1
- package/src/session/snapcompact-savings-journal.ts +113 -0
- package/src/session/tool-choice-queue.ts +23 -11
- package/src/slash-commands/builtin-registry.ts +25 -3
- package/src/stt/asr-client.ts +520 -0
- package/src/stt/asr-protocol.ts +65 -0
- package/src/stt/asr-worker.ts +790 -0
- package/src/stt/downloader.ts +107 -47
- package/src/stt/endpointer.ts +259 -0
- package/src/stt/index.ts +5 -1
- package/src/stt/models.ts +150 -0
- package/src/stt/recorder.ts +247 -60
- package/src/stt/stt-controller.ts +201 -22
- package/src/stt/transcriber.ts +37 -68
- package/src/stt/wav.ts +173 -0
- package/src/system-prompt.ts +8 -0
- package/src/task/agents.ts +1 -2
- package/src/task/executor.ts +49 -15
- package/src/task/index.ts +60 -6
- package/src/task/render.ts +83 -8
- package/src/task/types.ts +53 -0
- package/src/tools/ask.ts +8 -0
- package/src/tools/bash.ts +4 -3
- package/src/tools/eval-render.ts +4 -3
- package/src/tools/index.ts +40 -4
- package/src/tools/irc.ts +10 -2
- package/src/tools/job.ts +14 -2
- package/src/tools/learn.ts +144 -0
- package/src/tools/manage-skill.ts +104 -0
- package/src/tools/plan-mode-guard.ts +53 -19
- package/src/tools/renderers.ts +7 -11
- package/src/tools/ssh.ts +4 -3
- package/src/tools/todo.ts +1 -1
- package/src/tools/tts.ts +203 -92
- package/src/tools/write.ts +18 -2
- package/src/tts/downloader.ts +64 -0
- package/src/tts/index.ts +8 -0
- package/src/tts/models.ts +137 -0
- package/src/tts/player.ts +137 -0
- package/src/tts/runtime.ts +21 -0
- package/src/tts/streaming-player.ts +266 -0
- package/src/tts/tts-client.ts +647 -0
- package/src/tts/tts-protocol.ts +60 -0
- package/src/tts/tts-worker.ts +497 -0
- package/src/tts/vocalizer.ts +162 -0
- package/src/tts/wav.ts +58 -0
- package/src/utils/title-generator.ts +48 -5
- package/src/utils/tool-choice.ts +16 -0
- package/src/utils/tools-manager.test.ts +25 -0
- package/src/utils/tools-manager.ts +19 -1
- package/src/web/scrapers/github.ts +96 -0
- package/src/web/search/index.ts +13 -0
- package/src/web/search/providers/searxng.ts +13 -1
- package/dist/types/stt/setup.d.ts +0 -18
- package/src/stt/setup.ts +0 -52
- package/src/stt/transcribe.py +0 -70
package/src/stt/downloader.ts
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
1
|
+
import * as fs from "node:fs/promises";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { getTinyModelsCacheDir } from "@oh-my-pi/pi-utils";
|
|
4
|
+
import { sttClient } from "./asr-client";
|
|
5
|
+
import type { SttProgressStatus } from "./asr-protocol";
|
|
6
|
+
import { resolveSttModelSpec } from "./models";
|
|
7
|
+
import { ensureRecorder } from "./recorder";
|
|
4
8
|
|
|
5
9
|
export interface DownloadProgress {
|
|
6
10
|
stage: string;
|
|
@@ -9,63 +13,119 @@ export interface DownloadProgress {
|
|
|
9
13
|
|
|
10
14
|
export interface EnsureOptions {
|
|
11
15
|
modelName?: string;
|
|
16
|
+
signal?: AbortSignal;
|
|
12
17
|
onProgress?: (progress: DownloadProgress) => void;
|
|
13
18
|
}
|
|
14
19
|
|
|
15
|
-
// ──
|
|
20
|
+
// ── ONNX Whisper model ─────────────────────────────────────────────
|
|
16
21
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Real-progress event for a speech-model download, surfaced to UI callers.
|
|
24
|
+
* `percent` is an integer 0–100 aggregated across all model files (encoder +
|
|
25
|
+
* decoder shards), so it advances monotonically toward completion.
|
|
26
|
+
*/
|
|
27
|
+
export interface SttDownloadProgress {
|
|
28
|
+
status: SttProgressStatus;
|
|
29
|
+
/** Integer 0–100 aggregated across files. */
|
|
30
|
+
percent: number;
|
|
31
|
+
/** Bytes downloaded so far across all files. */
|
|
32
|
+
loaded: number;
|
|
33
|
+
/** Total bytes across all files seen so far. */
|
|
34
|
+
total: number;
|
|
35
|
+
/** The file currently downloading, when known. */
|
|
36
|
+
file?: string;
|
|
37
|
+
repo: string;
|
|
38
|
+
label: string;
|
|
39
|
+
}
|
|
21
40
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
41
|
+
/**
|
|
42
|
+
* Whether the selected model is already present in the local cache. For
|
|
43
|
+
* transformers.js Whisper tiers a complete download leaves `config.json` plus
|
|
44
|
+
* the `onnx/` weight files (a bare `config.json` from an interrupted fetch reads
|
|
45
|
+
* as not-cached); for sherpa-onnx tiers every model file (encoder/decoder/joiner
|
|
46
|
+
* + tokens) must be present (`.part` sidecars from an interrupted fetch are
|
|
47
|
+
* ignored).
|
|
48
|
+
*/
|
|
49
|
+
export async function isSttModelCached(key: string): Promise<boolean> {
|
|
50
|
+
const spec = resolveSttModelSpec(key);
|
|
51
|
+
const repoDir = path.join(getTinyModelsCacheDir(), spec.repo);
|
|
52
|
+
if (spec.engine === "sherpa") {
|
|
53
|
+
try {
|
|
54
|
+
const root = new Set(await fs.readdir(repoDir));
|
|
55
|
+
for (const role in spec.files) {
|
|
56
|
+
if (!root.has(spec.files[role as keyof typeof spec.files])) return false;
|
|
57
|
+
}
|
|
58
|
+
return true;
|
|
59
|
+
} catch {
|
|
60
|
+
return false;
|
|
31
61
|
}
|
|
32
|
-
return;
|
|
33
62
|
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
async function ensurePythonWhisper(options?: EnsureOptions): Promise<void> {
|
|
43
|
-
const pythonCmd = resolvePython();
|
|
44
|
-
if (!pythonCmd) {
|
|
45
|
-
throw new Error("Python not found. Install Python 3.8+ from https://python.org");
|
|
63
|
+
try {
|
|
64
|
+
const root = await fs.readdir(repoDir);
|
|
65
|
+
if (!root.includes("config.json")) return false;
|
|
66
|
+
const onnxFiles = await fs.readdir(path.join(repoDir, "onnx")).catch(() => [] as string[]);
|
|
67
|
+
return onnxFiles.some(file => file.endsWith(".onnx"));
|
|
68
|
+
} catch {
|
|
69
|
+
return false;
|
|
46
70
|
}
|
|
71
|
+
}
|
|
47
72
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
73
|
+
/**
|
|
74
|
+
* Download (or warm from cache) the selected ONNX Whisper model via the speech
|
|
75
|
+
* worker, resolving once the model is fully present and loaded. Streams real
|
|
76
|
+
* Hub progress with an aggregated integer percent. Rejects if the worker cannot
|
|
77
|
+
* obtain the model. Safe to call non-interactively.
|
|
78
|
+
*/
|
|
79
|
+
export async function downloadSttModel(
|
|
80
|
+
key: string,
|
|
81
|
+
onProgress?: (progress: SttDownloadProgress) => void,
|
|
82
|
+
options?: { signal?: AbortSignal },
|
|
83
|
+
): Promise<void> {
|
|
84
|
+
const spec = resolveSttModelSpec(key);
|
|
85
|
+
const files = new Map<string, { loaded: number; total: number }>();
|
|
86
|
+
const ok = await sttClient.downloadModel(spec.key, {
|
|
87
|
+
signal: options?.signal,
|
|
88
|
+
onProgress: event => {
|
|
89
|
+
if ((event.status === "progress" || event.status === "progress_total") && event.file) {
|
|
90
|
+
if (typeof event.loaded === "number" && typeof event.total === "number" && event.total > 0) {
|
|
91
|
+
files.set(event.file, { loaded: event.loaded, total: event.total });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
let loaded = 0;
|
|
95
|
+
let total = 0;
|
|
96
|
+
for (const file of files.values()) {
|
|
97
|
+
loaded += file.loaded;
|
|
98
|
+
total += file.total;
|
|
99
|
+
}
|
|
100
|
+
const settled = event.status === "ready" || event.status === "done";
|
|
101
|
+
const percent = total > 0 ? Math.min(100, Math.round((loaded / total) * 100)) : settled ? 100 : 0;
|
|
102
|
+
onProgress?.({
|
|
103
|
+
status: event.status,
|
|
104
|
+
percent,
|
|
105
|
+
loaded,
|
|
106
|
+
total,
|
|
107
|
+
file: event.file,
|
|
108
|
+
repo: spec.repo,
|
|
109
|
+
label: spec.label,
|
|
110
|
+
});
|
|
111
|
+
},
|
|
52
112
|
});
|
|
53
|
-
if (
|
|
54
|
-
|
|
55
|
-
options?.onProgress?.({ stage: "Installing openai-whisper (this may take a few minutes)..." });
|
|
56
|
-
logger.debug("Installing openai-whisper via pip");
|
|
57
|
-
|
|
58
|
-
const install = await $`${pythonCmd} -m pip install -q openai-whisper`.quiet().nothrow();
|
|
59
|
-
if (install.exitCode !== 0) {
|
|
60
|
-
const stderr = install.stderr.toString().trim();
|
|
61
|
-
throw new Error(`Failed to install openai-whisper: ${stderr.split("\n").pop()}`);
|
|
62
|
-
}
|
|
63
|
-
logger.debug("openai-whisper installed successfully");
|
|
113
|
+
if (!ok) throw new Error(`Failed to download speech model (${spec.repo}). Check your network connection.`);
|
|
64
114
|
}
|
|
65
115
|
|
|
66
116
|
// ── Public API ─────────────────────────────────────────────────────
|
|
67
117
|
|
|
68
118
|
export async function ensureSTTDependencies(options?: EnsureOptions): Promise<void> {
|
|
69
|
-
await
|
|
70
|
-
await
|
|
119
|
+
await ensureRecorder(progress => options?.onProgress?.(progress), options?.signal);
|
|
120
|
+
await downloadSttModel(
|
|
121
|
+
resolveSttModelSpec(options?.modelName).key,
|
|
122
|
+
progress => {
|
|
123
|
+
const stage =
|
|
124
|
+
progress.status === "ready" || progress.status === "done"
|
|
125
|
+
? `Speech model ${progress.label} ready`
|
|
126
|
+
: `Downloading speech model ${progress.label}`;
|
|
127
|
+
options?.onProgress?.({ stage, percent: progress.percent });
|
|
128
|
+
},
|
|
129
|
+
{ signal: options?.signal },
|
|
130
|
+
);
|
|
71
131
|
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Energy-based speech endpointer for live transcription.
|
|
3
|
+
*
|
|
4
|
+
* The on-device ASR models we ship are non-streaming: the sherpa-onnx Parakeet
|
|
5
|
+
* recognizer and the transformers.js Whisper pipelines both decode a complete
|
|
6
|
+
* waveform in one shot. To transcribe *while the user is still speaking*, this
|
|
7
|
+
* splits the continuous 16 kHz mono float stream into speech segments at natural
|
|
8
|
+
* pauses — each segment is decoded and committed as it finalizes, and the
|
|
9
|
+
* in-progress segment is re-decoded periodically for a volatile live preview.
|
|
10
|
+
*
|
|
11
|
+
* Segmentation is pure short-time-energy VAD with an adaptive noise floor, so it
|
|
12
|
+
* needs no extra model and is engine-agnostic (it runs the same way whether the
|
|
13
|
+
* downstream model is sherpa or transformers). It is deliberately simple and
|
|
14
|
+
* fully deterministic so it can be unit-tested with synthetic signals.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/** Tunable thresholds for {@link StreamEndpointer}. All durations in ms. */
|
|
18
|
+
export interface EndpointerConfig {
|
|
19
|
+
/** Input sample rate (the recorder always delivers 16 kHz mono). */
|
|
20
|
+
sampleRate: number;
|
|
21
|
+
/** Short-time analysis frame size. */
|
|
22
|
+
frameMs: number;
|
|
23
|
+
/** Trailing silence inside a segment that finalizes (commits) it. */
|
|
24
|
+
endSilenceMs: number;
|
|
25
|
+
/** Shortest speech run that is committed; shorter runs are discarded as noise. */
|
|
26
|
+
minSpeechMs: number;
|
|
27
|
+
/** Hard cap on segment length so long pause-free speech still commits periodically. */
|
|
28
|
+
maxSegmentMs: number;
|
|
29
|
+
/** Audio retained before onset so the first phoneme of a segment is never clipped. */
|
|
30
|
+
preRollMs: number;
|
|
31
|
+
/** Cadence of volatile partial emissions for the in-progress segment. */
|
|
32
|
+
partialIntervalMs: number;
|
|
33
|
+
/** Speech threshold is `max(minThreshold, noiseFloor * energyRatio)`. */
|
|
34
|
+
energyRatio: number;
|
|
35
|
+
/** EMA weight tracking the ambient noise floor on non-speech frames. */
|
|
36
|
+
floorAttack: number;
|
|
37
|
+
/** Absolute RMS floor so a near-silent room never trips speech detection. */
|
|
38
|
+
minThreshold: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export const DEFAULT_ENDPOINTER_CONFIG: EndpointerConfig = {
|
|
42
|
+
sampleRate: 16_000,
|
|
43
|
+
frameMs: 30,
|
|
44
|
+
endSilenceMs: 600,
|
|
45
|
+
minSpeechMs: 200,
|
|
46
|
+
maxSegmentMs: 12_000,
|
|
47
|
+
preRollMs: 240,
|
|
48
|
+
partialIntervalMs: 450,
|
|
49
|
+
energyRatio: 2.5,
|
|
50
|
+
floorAttack: 0.05,
|
|
51
|
+
minThreshold: 0.008,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Emitted by {@link StreamEndpointer.push} / {@link StreamEndpointer.flush}.
|
|
56
|
+
* `partial` is the volatile in-progress segment (decode and show as preview,
|
|
57
|
+
* never commit); `segment` is a finalized run (decode and commit once).
|
|
58
|
+
*/
|
|
59
|
+
export type EndpointerEvent = { kind: "partial"; audio: Float32Array } | { kind: "segment"; audio: Float32Array };
|
|
60
|
+
|
|
61
|
+
/** Append-growable Float32 buffer (amortized O(1) push, no per-frame realloc). */
|
|
62
|
+
class FloatBuffer {
|
|
63
|
+
#data = new Float32Array(0);
|
|
64
|
+
#len = 0;
|
|
65
|
+
|
|
66
|
+
get length(): number {
|
|
67
|
+
return this.#len;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
push(samples: Float32Array): void {
|
|
71
|
+
const needed = this.#len + samples.length;
|
|
72
|
+
if (needed > this.#data.length) {
|
|
73
|
+
const next = new Float32Array(Math.max(this.#data.length * 2, needed, 1 << 14));
|
|
74
|
+
next.set(this.#data.subarray(0, this.#len));
|
|
75
|
+
this.#data = next;
|
|
76
|
+
}
|
|
77
|
+
this.#data.set(samples, this.#len);
|
|
78
|
+
this.#len += samples.length;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Copy `[0, end)` into a fresh array the caller can retain. */
|
|
82
|
+
take(end = this.#len): Float32Array {
|
|
83
|
+
return this.#data.slice(0, Math.max(0, Math.min(end, this.#len)));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
reset(): void {
|
|
87
|
+
this.#len = 0;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function rms(frame: Float32Array): number {
|
|
92
|
+
let sum = 0;
|
|
93
|
+
for (let i = 0; i < frame.length; i += 1) sum += frame[i]! * frame[i]!;
|
|
94
|
+
return Math.sqrt(sum / Math.max(1, frame.length));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export class StreamEndpointer {
|
|
98
|
+
readonly #cfg: EndpointerConfig;
|
|
99
|
+
readonly #frameSamples: number;
|
|
100
|
+
readonly #preRollSamples: number;
|
|
101
|
+
|
|
102
|
+
#leftover = new Float32Array(0);
|
|
103
|
+
#inSpeech = false;
|
|
104
|
+
#noiseFloor: number;
|
|
105
|
+
#silenceMs = 0;
|
|
106
|
+
#segmentMs = 0;
|
|
107
|
+
#msSincePartial = 0;
|
|
108
|
+
#partialDirty = false;
|
|
109
|
+
|
|
110
|
+
readonly #segment = new FloatBuffer();
|
|
111
|
+
/** Ring of the most recent pre-onset frames, used as segment pre-roll. */
|
|
112
|
+
readonly #preRoll = new FloatBuffer();
|
|
113
|
+
|
|
114
|
+
constructor(config: Partial<EndpointerConfig> = {}) {
|
|
115
|
+
this.#cfg = { ...DEFAULT_ENDPOINTER_CONFIG, ...config };
|
|
116
|
+
this.#frameSamples = Math.max(1, Math.round((this.#cfg.sampleRate * this.#cfg.frameMs) / 1000));
|
|
117
|
+
this.#preRollSamples = Math.max(0, Math.round((this.#cfg.sampleRate * this.#cfg.preRollMs) / 1000));
|
|
118
|
+
this.#noiseFloor = this.#cfg.minThreshold;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/** Feed newly-captured samples; returns ordered partial/segment events. */
|
|
122
|
+
push(samples: Float32Array): EndpointerEvent[] {
|
|
123
|
+
const events: EndpointerEvent[] = [];
|
|
124
|
+
// Prepend the carried-over tail, then consume whole frames.
|
|
125
|
+
let buf: Float32Array;
|
|
126
|
+
if (this.#leftover.length === 0) {
|
|
127
|
+
buf = samples;
|
|
128
|
+
} else {
|
|
129
|
+
buf = new Float32Array(this.#leftover.length + samples.length);
|
|
130
|
+
buf.set(this.#leftover, 0);
|
|
131
|
+
buf.set(samples, this.#leftover.length);
|
|
132
|
+
}
|
|
133
|
+
let offset = 0;
|
|
134
|
+
for (; offset + this.#frameSamples <= buf.length; offset += this.#frameSamples) {
|
|
135
|
+
this.#processFrame(buf.subarray(offset, offset + this.#frameSamples), events);
|
|
136
|
+
}
|
|
137
|
+
this.#leftover = buf.slice(offset);
|
|
138
|
+
return events;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/** End the stream; returns a trailing committed segment if one is pending. */
|
|
142
|
+
flush(): EndpointerEvent[] {
|
|
143
|
+
const events: EndpointerEvent[] = [];
|
|
144
|
+
if (this.#inSpeech && this.#leftover.length > 0) {
|
|
145
|
+
this.#segment.push(this.#leftover);
|
|
146
|
+
this.#segmentMs += (this.#leftover.length / this.#cfg.sampleRate) * 1000;
|
|
147
|
+
}
|
|
148
|
+
this.#leftover = new Float32Array(0);
|
|
149
|
+
if (this.#inSpeech) {
|
|
150
|
+
const speechMs = this.#segmentMs - this.#silenceMs;
|
|
151
|
+
if (speechMs >= this.#cfg.minSpeechMs) {
|
|
152
|
+
events.push({ kind: "segment", audio: this.#segment.take(this.#endpointKeep()) });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
this.#reset();
|
|
156
|
+
return events;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
#processFrame(frame: Float32Array, events: EndpointerEvent[]): void {
|
|
160
|
+
const energy = rms(frame);
|
|
161
|
+
const threshold = Math.max(this.#cfg.minThreshold, this.#noiseFloor * this.#cfg.energyRatio);
|
|
162
|
+
const voiced = energy > threshold;
|
|
163
|
+
// Track ambient noise on non-speech frames only, so loud speech never
|
|
164
|
+
// inflates the floor (which would make the tail of an utterance read as
|
|
165
|
+
// silence and clip the segment short).
|
|
166
|
+
if (!voiced) {
|
|
167
|
+
this.#noiseFloor = this.#noiseFloor * (1 - this.#cfg.floorAttack) + energy * this.#cfg.floorAttack;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (!this.#inSpeech) {
|
|
171
|
+
this.#preRoll.push(frame);
|
|
172
|
+
// Keep only the most recent pre-roll window.
|
|
173
|
+
if (this.#preRoll.length > this.#preRollSamples) {
|
|
174
|
+
const tail = this.#preRoll.take().slice(this.#preRoll.length - this.#preRollSamples);
|
|
175
|
+
this.#preRoll.reset();
|
|
176
|
+
this.#preRoll.push(tail);
|
|
177
|
+
}
|
|
178
|
+
if (voiced) this.#beginSegment(frame);
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
this.#segment.push(frame);
|
|
183
|
+
this.#segmentMs += this.#cfg.frameMs;
|
|
184
|
+
this.#msSincePartial += this.#cfg.frameMs;
|
|
185
|
+
if (voiced) {
|
|
186
|
+
this.#silenceMs = 0;
|
|
187
|
+
this.#partialDirty = true;
|
|
188
|
+
} else {
|
|
189
|
+
this.#silenceMs += this.#cfg.frameMs;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (this.#silenceMs >= this.#cfg.endSilenceMs) {
|
|
193
|
+
this.#finalizeSegment(events);
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
if (this.#segmentMs >= this.#cfg.maxSegmentMs) {
|
|
197
|
+
// Pause-free long speech: commit what we have and continue a fresh
|
|
198
|
+
// segment so output keeps flowing.
|
|
199
|
+
events.push({ kind: "segment", audio: this.#segment.take() });
|
|
200
|
+
this.#segment.reset();
|
|
201
|
+
this.#segmentMs = 0;
|
|
202
|
+
this.#silenceMs = 0;
|
|
203
|
+
this.#msSincePartial = 0;
|
|
204
|
+
this.#partialDirty = false;
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
if (this.#partialDirty && this.#msSincePartial >= this.#cfg.partialIntervalMs) {
|
|
208
|
+
events.push({ kind: "partial", audio: this.#segment.take() });
|
|
209
|
+
this.#msSincePartial = 0;
|
|
210
|
+
this.#partialDirty = false;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
#beginSegment(onsetFrame: Float32Array): void {
|
|
215
|
+
this.#inSpeech = true;
|
|
216
|
+
this.#segment.reset();
|
|
217
|
+
const preRoll = this.#preRoll.take();
|
|
218
|
+
if (preRoll.length > 0) this.#segment.push(preRoll);
|
|
219
|
+
this.#segment.push(onsetFrame);
|
|
220
|
+
this.#preRoll.reset();
|
|
221
|
+
this.#silenceMs = 0;
|
|
222
|
+
this.#segmentMs = (this.#segment.length / this.#cfg.sampleRate) * 1000;
|
|
223
|
+
this.#msSincePartial = 0;
|
|
224
|
+
this.#partialDirty = true;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
#finalizeSegment(events: EndpointerEvent[]): void {
|
|
228
|
+
const speechMs = this.#segmentMs - this.#silenceMs;
|
|
229
|
+
if (speechMs >= this.#cfg.minSpeechMs) {
|
|
230
|
+
events.push({ kind: "segment", audio: this.#segment.take(this.#endpointKeep()) });
|
|
231
|
+
}
|
|
232
|
+
this.#inSpeech = false;
|
|
233
|
+
this.#segment.reset();
|
|
234
|
+
this.#silenceMs = 0;
|
|
235
|
+
this.#segmentMs = 0;
|
|
236
|
+
this.#msSincePartial = 0;
|
|
237
|
+
this.#partialDirty = false;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/** Samples to keep when committing on silence: drop most of the trailing
|
|
241
|
+
* silence but leave a short tail so the final word is not cut. */
|
|
242
|
+
#endpointKeep(): number {
|
|
243
|
+
const tailMs = Math.min(this.#silenceMs, 120);
|
|
244
|
+
const dropMs = Math.max(0, this.#silenceMs - tailMs);
|
|
245
|
+
const drop = Math.round((this.#cfg.sampleRate * dropMs) / 1000);
|
|
246
|
+
return Math.max(0, this.#segment.length - drop);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
#reset(): void {
|
|
250
|
+
this.#inSpeech = false;
|
|
251
|
+
this.#segment.reset();
|
|
252
|
+
this.#preRoll.reset();
|
|
253
|
+
this.#silenceMs = 0;
|
|
254
|
+
this.#segmentMs = 0;
|
|
255
|
+
this.#msSincePartial = 0;
|
|
256
|
+
this.#partialDirty = false;
|
|
257
|
+
this.#noiseFloor = this.#cfg.minThreshold;
|
|
258
|
+
}
|
|
259
|
+
}
|
package/src/stt/index.ts
CHANGED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import type { TinyModelDtype } from "../tiny/dtype";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* On-device speech-to-text model registry. Each tier maps a stable settings key
|
|
5
|
+
* onto a locally-runnable ASR model and the engine that loads it:
|
|
6
|
+
*
|
|
7
|
+
* - `transformers` — a transformers.js / ONNX Whisper repo, loaded by the
|
|
8
|
+
* `@huggingface/transformers` `automatic-speech-recognition` pipeline.
|
|
9
|
+
* - `sherpa` — a sherpa-onnx (Next-gen Kaldi) offline model, loaded by the
|
|
10
|
+
* native `sherpa-onnx-node` addon. Used for NVIDIA Parakeet, the Open ASR
|
|
11
|
+
* Leaderboard accuracy/speed leader.
|
|
12
|
+
*
|
|
13
|
+
* The worker resolves the spec by key and loads the model lazily (kept warm
|
|
14
|
+
* afterwards). Both engines run inside the hard-killed subprocess worker.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/** ASR runtime that loads a given tier's model. */
|
|
18
|
+
export type SttEngine = "transformers" | "sherpa";
|
|
19
|
+
|
|
20
|
+
interface SttModelBase {
|
|
21
|
+
/** Stable key persisted in `stt.modelName` and sent over the worker protocol. */
|
|
22
|
+
key: string;
|
|
23
|
+
engine: SttEngine;
|
|
24
|
+
/** Hugging Face repo id (transformers.js ONNX repo, or sherpa-onnx model repo). */
|
|
25
|
+
repo: string;
|
|
26
|
+
/** English-only checkpoint: rejects a configured source `language`. */
|
|
27
|
+
englishOnly: boolean;
|
|
28
|
+
label: string;
|
|
29
|
+
description: string;
|
|
30
|
+
/** Approximate on-disk download size for the shipped weights (UI hint). */
|
|
31
|
+
sizeHint: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** A Whisper-family tier loaded via the transformers.js ASR pipeline. */
|
|
35
|
+
export interface TransformersSttModelSpec extends SttModelBase {
|
|
36
|
+
engine: "transformers";
|
|
37
|
+
/** ONNX precision used unless overridden by `PI_TINY_DTYPE` / `providers.tinyModelDtype`. */
|
|
38
|
+
dtype: TinyModelDtype;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** A sherpa-onnx offline tier (e.g. NeMo Parakeet transducer) loaded natively. */
|
|
42
|
+
export interface SherpaSttModelSpec extends SttModelBase {
|
|
43
|
+
engine: "sherpa";
|
|
44
|
+
/** sherpa-onnx offline model family (e.g. `nemo_transducer`). */
|
|
45
|
+
modelType: string;
|
|
46
|
+
/** Model files (relative to the repo root) fetched into the local cache. */
|
|
47
|
+
files: { encoder: string; decoder: string; joiner: string; tokens: string };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export type SttModelSpec = TransformersSttModelSpec | SherpaSttModelSpec;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Speech model tiers, ordered light → SoTA. Defaults to {@link DEFAULT_STT_MODEL_KEY}.
|
|
54
|
+
* `fast`/`balanced`/`turbo` are multilingual Whisper checkpoints on transformers.js;
|
|
55
|
+
* `parakeet` is NVIDIA Parakeet TDT 0.6B v3 on sherpa-onnx — the Open ASR
|
|
56
|
+
* Leaderboard leader (lower WER and far higher throughput than Whisper).
|
|
57
|
+
*/
|
|
58
|
+
export const STT_MODELS = [
|
|
59
|
+
{
|
|
60
|
+
key: "fast",
|
|
61
|
+
engine: "transformers",
|
|
62
|
+
repo: "onnx-community/whisper-base",
|
|
63
|
+
dtype: "q8",
|
|
64
|
+
englishOnly: false,
|
|
65
|
+
label: "Fast (Whisper base)",
|
|
66
|
+
description: "Whisper base, multilingual. Smallest + fastest; lowest accuracy. Best for low-resource machines.",
|
|
67
|
+
sizeHint: "~60 MB",
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
key: "balanced",
|
|
71
|
+
engine: "transformers",
|
|
72
|
+
repo: "onnx-community/whisper-small",
|
|
73
|
+
dtype: "q8",
|
|
74
|
+
englishOnly: false,
|
|
75
|
+
label: "Balanced (Whisper small)",
|
|
76
|
+
description: "Whisper small, multilingual. More accurate than Fast, still light on CPU/RAM.",
|
|
77
|
+
sizeHint: "~190 MB",
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
key: "turbo",
|
|
81
|
+
engine: "transformers",
|
|
82
|
+
repo: "onnx-community/whisper-large-v3-turbo",
|
|
83
|
+
dtype: "q4",
|
|
84
|
+
englishOnly: false,
|
|
85
|
+
label: "Turbo (Whisper large-v3)",
|
|
86
|
+
description: "Whisper large-v3-turbo, 99 languages. Widest language coverage; large download, slower.",
|
|
87
|
+
sizeHint: "~600 MB",
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
key: "parakeet",
|
|
91
|
+
engine: "sherpa",
|
|
92
|
+
repo: "csukuangfj/sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8",
|
|
93
|
+
modelType: "nemo_transducer",
|
|
94
|
+
files: {
|
|
95
|
+
encoder: "encoder.int8.onnx",
|
|
96
|
+
decoder: "decoder.int8.onnx",
|
|
97
|
+
joiner: "joiner.int8.onnx",
|
|
98
|
+
tokens: "tokens.txt",
|
|
99
|
+
},
|
|
100
|
+
englishOnly: false,
|
|
101
|
+
label: "Parakeet TDT v3 (SoTA)",
|
|
102
|
+
description:
|
|
103
|
+
"NVIDIA Parakeet TDT 0.6B v3, 25 languages. Open ASR Leaderboard leader — best accuracy and far fastest decoding. Default.",
|
|
104
|
+
sizeHint: "~680 MB",
|
|
105
|
+
},
|
|
106
|
+
] as const satisfies readonly SttModelSpec[];
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* SoTA default — NVIDIA Parakeet TDT 0.6B v3 (sherpa-onnx). Tops the Open ASR
|
|
110
|
+
* Leaderboard on accuracy while decoding ~20× faster than Whisper large-v3.
|
|
111
|
+
*/
|
|
112
|
+
export const DEFAULT_STT_MODEL_KEY = "parakeet";
|
|
113
|
+
|
|
114
|
+
export type SttModelKey = (typeof STT_MODELS)[number]["key"];
|
|
115
|
+
|
|
116
|
+
/** A concrete entry from {@link STT_MODELS}; `key` is the literal tier union. */
|
|
117
|
+
export type SttModel = (typeof STT_MODELS)[number];
|
|
118
|
+
|
|
119
|
+
export const STT_MODEL_VALUES = ["fast", "balanced", "turbo", "parakeet"] as const satisfies readonly SttModelKey[];
|
|
120
|
+
|
|
121
|
+
type MissingSttModelValue = Exclude<SttModelKey, (typeof STT_MODEL_VALUES)[number]>;
|
|
122
|
+
type ExtraSttModelValue = Exclude<(typeof STT_MODEL_VALUES)[number], SttModelKey>;
|
|
123
|
+
const STT_MODEL_VALUES_MATCH_REGISTRY: MissingSttModelValue extends never
|
|
124
|
+
? ExtraSttModelValue extends never
|
|
125
|
+
? true
|
|
126
|
+
: never
|
|
127
|
+
: never = true;
|
|
128
|
+
void STT_MODEL_VALUES_MATCH_REGISTRY;
|
|
129
|
+
|
|
130
|
+
export const STT_MODEL_OPTIONS = STT_MODELS.map(({ key, label, description }) => ({
|
|
131
|
+
value: key,
|
|
132
|
+
label,
|
|
133
|
+
description,
|
|
134
|
+
})) satisfies ReadonlyArray<{ value: SttModelKey; label: string; description: string }>;
|
|
135
|
+
|
|
136
|
+
export function isSttModelKey(value: string): value is SttModelKey {
|
|
137
|
+
return STT_MODELS.some(model => model.key === value);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export function getSttModelSpec(key: string): SttModel | undefined {
|
|
141
|
+
return STT_MODELS.find(model => model.key === key);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Resolve a (possibly stale or legacy) `stt.modelName` value onto a concrete
|
|
146
|
+
* spec, falling back to the SoTA default when the key is unknown.
|
|
147
|
+
*/
|
|
148
|
+
export function resolveSttModelSpec(key: string | undefined): SttModel {
|
|
149
|
+
return (key !== undefined ? getSttModelSpec(key) : undefined) ?? getSttModelSpec(DEFAULT_STT_MODEL_KEY)!;
|
|
150
|
+
}
|