@oh-my-pi/pi-coding-agent 15.12.4 → 15.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +304 -6
- package/dist/cli.js +1015 -881
- package/dist/types/async/job-manager.d.ts +15 -0
- package/dist/types/autolearn/controller.d.ts +25 -0
- package/dist/types/autolearn/managed-skills.d.ts +45 -0
- package/dist/types/autoresearch/state.d.ts +1 -1
- package/dist/types/autoresearch/types.d.ts +1 -1
- package/dist/types/cli/args.d.ts +19 -1
- package/dist/types/cli/session-picker.d.ts +1 -1
- package/dist/types/cli/setup-cli.d.ts +1 -1
- package/dist/types/cli/setup-model-picker.d.ts +14 -0
- package/dist/types/collab/protocol.d.ts +1 -1
- package/dist/types/commands/say.d.ts +24 -0
- package/dist/types/config/keybindings.d.ts +3 -3
- package/dist/types/config/model-registry.d.ts +10 -0
- package/dist/types/config/models-config-schema.d.ts +12 -0
- package/dist/types/config/models-config.d.ts +8 -2
- package/dist/types/config/settings-schema.d.ts +261 -58
- package/dist/types/export/html/index.d.ts +2 -1
- package/dist/types/extensibility/extensions/model-api.d.ts +17 -0
- package/dist/types/extensibility/extensions/runner.d.ts +3 -1
- package/dist/types/extensibility/extensions/types.d.ts +47 -1
- package/dist/types/extensibility/hooks/index.d.ts +2 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +9 -0
- package/dist/types/extensibility/plugins/loader.d.ts +11 -0
- package/dist/types/extensibility/shared-events.d.ts +1 -1
- package/dist/types/extensibility/skills.d.ts +10 -0
- package/dist/types/goals/guided-setup.d.ts +18 -0
- package/dist/types/goals/state.d.ts +1 -1
- package/dist/types/hindsight/transcript.d.ts +1 -1
- package/dist/types/index.d.ts +5 -0
- package/dist/types/internal-urls/local-protocol.d.ts +4 -2
- package/dist/types/main.d.ts +4 -3
- package/dist/types/mcp/startup-events.d.ts +11 -0
- package/dist/types/memories/index.d.ts +7 -0
- package/dist/types/memory-backend/local-backend.d.ts +4 -3
- package/dist/types/mnemopi/config.d.ts +4 -4
- package/dist/types/modes/components/agent-hub.d.ts +6 -0
- package/dist/types/modes/components/assistant-message.d.ts +1 -2
- package/dist/types/modes/components/compaction-summary-message.d.ts +15 -1
- package/dist/types/modes/components/custom-editor.d.ts +39 -1
- package/dist/types/modes/components/custom-editor.test.d.ts +1 -0
- package/dist/types/modes/components/session-selector.d.ts +1 -1
- package/dist/types/modes/components/tool-execution.d.ts +26 -16
- package/dist/types/modes/components/transcript-container.d.ts +23 -2
- package/dist/types/modes/components/tree-selector.d.ts +1 -1
- package/dist/types/modes/components/usage-row.d.ts +3 -0
- package/dist/types/modes/controllers/command-controller.d.ts +2 -2
- package/dist/types/modes/controllers/input-controller.d.ts +14 -0
- package/dist/types/modes/controllers/selector-controller.d.ts +3 -1
- package/dist/types/modes/gradient-highlight.d.ts +9 -4
- package/dist/types/modes/image-references.d.ts +6 -0
- package/dist/types/modes/interactive-mode.d.ts +27 -3
- package/dist/types/modes/magic-keywords.d.ts +13 -1
- package/dist/types/modes/rpc/rpc-mode.d.ts +35 -1
- package/dist/types/modes/rpc/rpc-types.d.ts +9 -1
- package/dist/types/modes/runtime-init.d.ts +4 -0
- package/dist/types/modes/theme/theme.d.ts +13 -2
- package/dist/types/modes/types.d.ts +8 -2
- package/dist/types/modes/utils/ui-helpers.d.ts +1 -1
- package/dist/types/registry/agent-registry.d.ts +17 -0
- package/dist/types/secrets/obfuscator.d.ts +1 -1
- package/dist/types/session/agent-session.d.ts +14 -2
- package/dist/types/session/indexed-session-storage.d.ts +3 -4
- package/dist/types/session/session-context.d.ts +39 -0
- package/dist/types/session/session-entries.d.ts +159 -0
- package/dist/types/session/session-listing.d.ts +69 -0
- package/dist/types/session/session-loader.d.ts +16 -0
- package/dist/types/session/session-manager.d.ts +82 -474
- package/dist/types/session/session-migrations.d.ts +12 -0
- package/dist/types/session/session-paths.d.ts +25 -0
- package/dist/types/session/session-persistence.d.ts +8 -0
- package/dist/types/session/session-storage.d.ts +11 -12
- package/dist/types/session/snapcompact-inline.d.ts +12 -1
- package/dist/types/session/snapcompact-savings-journal.d.ts +46 -0
- package/dist/types/session/tool-choice-queue.d.ts +6 -6
- package/dist/types/stt/asr-client.d.ts +90 -0
- package/dist/types/stt/asr-protocol.d.ts +97 -0
- package/dist/types/stt/asr-worker.d.ts +2 -0
- package/dist/types/stt/downloader.d.ts +38 -0
- package/dist/types/stt/endpointer.d.ts +59 -0
- package/dist/types/stt/index.d.ts +5 -1
- package/dist/types/stt/models.d.ts +120 -0
- package/dist/types/stt/recorder.d.ts +17 -0
- package/dist/types/stt/stt-controller.d.ts +6 -0
- package/dist/types/stt/transcriber.d.ts +5 -7
- package/dist/types/stt/wav.d.ts +29 -0
- package/dist/types/system-prompt.d.ts +4 -0
- package/dist/types/task/executor.d.ts +2 -0
- package/dist/types/task/index.d.ts +9 -1
- package/dist/types/task/types.d.ts +36 -0
- package/dist/types/tools/bash.d.ts +2 -2
- package/dist/types/tools/eval-render.d.ts +1 -1
- package/dist/types/tools/index.d.ts +11 -1
- package/dist/types/tools/irc.d.ts +1 -0
- package/dist/types/tools/learn.d.ts +51 -0
- package/dist/types/tools/manage-skill.d.ts +40 -0
- package/dist/types/tools/plan-mode-guard.d.ts +10 -0
- package/dist/types/tools/renderers.d.ts +7 -11
- package/dist/types/tools/ssh.d.ts +1 -1
- package/dist/types/tools/todo.d.ts +1 -1
- package/dist/types/tools/tts.d.ts +25 -0
- package/dist/types/tools/write.d.ts +1 -1
- package/dist/types/tts/downloader.d.ts +20 -0
- package/dist/types/tts/index.d.ts +8 -0
- package/dist/types/tts/models.d.ts +82 -0
- package/dist/types/tts/player.d.ts +32 -0
- package/dist/types/tts/runtime.d.ts +6 -0
- package/dist/types/tts/streaming-player.d.ts +41 -0
- package/dist/types/tts/tts-client.d.ts +93 -0
- package/dist/types/tts/tts-protocol.d.ts +95 -0
- package/dist/types/tts/tts-worker.d.ts +2 -0
- package/dist/types/tts/vocalizer.d.ts +41 -0
- package/dist/types/tts/wav.d.ts +8 -0
- package/dist/types/utils/tool-choice.d.ts +8 -0
- package/dist/types/utils/tools-manager.d.ts +2 -1
- package/dist/types/utils/tools-manager.test.d.ts +1 -0
- package/dist/types/web/scrapers/github.d.ts +1 -1
- package/package.json +15 -14
- package/src/async/job-manager.ts +49 -0
- package/src/autolearn/controller.ts +139 -0
- package/src/autolearn/managed-skills.ts +257 -0
- package/src/autoresearch/state.ts +1 -1
- package/src/autoresearch/types.ts +1 -1
- package/src/cli/args.ts +56 -2
- package/src/cli/session-picker.ts +2 -1
- package/src/cli/setup-cli.ts +148 -47
- package/src/cli/setup-model-picker.ts +43 -0
- package/src/cli-commands.ts +1 -0
- package/src/cli.ts +45 -13
- package/src/collab/host.ts +1 -1
- package/src/collab/protocol.ts +1 -1
- package/src/commands/say.ts +102 -0
- package/src/commands/setup.ts +1 -1
- package/src/commit/agentic/tools/analyze-file.ts +3 -0
- package/src/config/keybindings.ts +2 -2
- package/src/config/model-discovery.ts +11 -5
- package/src/config/model-registry.ts +64 -9
- package/src/config/models-config-schema.ts +4 -1
- package/src/config/models-config.ts +2 -1
- package/src/config/settings-schema.ts +248 -32
- package/src/config/settings.ts +10 -0
- package/src/discovery/builtin.ts +23 -1
- package/src/discovery/claude-plugins.ts +44 -5
- package/src/discovery/helpers.ts +41 -1
- package/src/eval/__tests__/budget-bridge.test.ts +1 -1
- package/src/eval/js/shared/prelude.txt +69 -17
- package/src/export/html/index.ts +3 -6
- package/src/extensibility/extensions/model-api.ts +41 -0
- package/src/extensibility/extensions/runner.ts +4 -0
- package/src/extensibility/extensions/types.ts +52 -1
- package/src/extensibility/extensions/wrapper.ts +41 -5
- package/src/extensibility/hooks/index.ts +2 -1
- package/src/extensibility/plugins/legacy-pi-compat.ts +43 -13
- package/src/extensibility/plugins/loader.ts +30 -19
- package/src/extensibility/plugins/manager.ts +221 -90
- package/src/extensibility/shared-events.ts +1 -1
- package/src/extensibility/skills.ts +96 -15
- package/src/goals/guided-setup.ts +133 -0
- package/src/goals/state.ts +1 -1
- package/src/hindsight/transcript.ts +1 -1
- package/src/index.ts +5 -0
- package/src/internal-urls/docs-index.generated.ts +10 -10
- package/src/internal-urls/history-protocol.ts +1 -1
- package/src/internal-urls/local-protocol.ts +29 -7
- package/src/main.ts +27 -7
- package/src/mcp/startup-events.ts +21 -0
- package/src/mcp/transports/stdio.ts +2 -1
- package/src/memories/index.ts +146 -11
- package/src/memory-backend/local-backend.ts +11 -5
- package/src/mnemopi/backend.ts +1 -0
- package/src/mnemopi/config.ts +26 -10
- package/src/modes/acp/acp-agent.ts +3 -5
- package/src/modes/components/agent-hub.ts +49 -4
- package/src/modes/components/assistant-message.ts +4 -37
- package/src/modes/components/compaction-summary-message.ts +125 -26
- package/src/modes/components/custom-editor.test.ts +96 -0
- package/src/modes/components/custom-editor.ts +164 -8
- package/src/modes/components/session-selector.ts +1 -1
- package/src/modes/components/settings-defs.ts +7 -0
- package/src/modes/components/tool-execution.ts +82 -43
- package/src/modes/components/transcript-container.ts +70 -1
- package/src/modes/components/tree-selector.ts +1 -1
- package/src/modes/components/usage-row.ts +18 -0
- package/src/modes/components/user-message.ts +4 -2
- package/src/modes/controllers/command-controller.ts +14 -4
- package/src/modes/controllers/event-controller.ts +78 -11
- package/src/modes/controllers/extension-ui-controller.ts +6 -0
- package/src/modes/controllers/input-controller.ts +258 -27
- package/src/modes/controllers/selector-controller.ts +12 -2
- package/src/modes/gradient-highlight.ts +21 -9
- package/src/modes/image-references.ts +20 -0
- package/src/modes/interactive-mode.ts +286 -40
- package/src/modes/magic-keywords.ts +27 -5
- package/src/modes/rpc/rpc-mode.ts +146 -14
- package/src/modes/rpc/rpc-subagents.ts +2 -2
- package/src/modes/rpc/rpc-types.ts +8 -2
- package/src/modes/runtime-init.ts +28 -3
- package/src/modes/theme/theme.ts +98 -50
- package/src/modes/types.ts +6 -2
- package/src/modes/utils/hotkeys-markdown.ts +1 -1
- package/src/modes/utils/ui-helpers.ts +34 -6
- package/src/priority.json +5 -1
- package/src/prompts/agents/task.md +1 -0
- package/src/prompts/goals/guided-goal-interview.md +8 -0
- package/src/prompts/goals/guided-goal-system.md +12 -0
- package/src/prompts/memories/read-path.md +6 -0
- package/src/prompts/system/autolearn-guidance-learn.md +1 -0
- package/src/prompts/system/autolearn-guidance.md +7 -0
- package/src/prompts/system/autolearn-nudge.md +3 -0
- package/src/prompts/system/eager-task.md +7 -0
- package/src/prompts/system/eager-todo.md +11 -6
- package/src/prompts/system/subagent-system-prompt.md +4 -0
- package/src/prompts/system/system-prompt.md +10 -5
- package/src/prompts/system/title-marker-instruction.md +1 -0
- package/src/prompts/system/title-system-marker.md +16 -0
- package/src/prompts/tools/job.md +1 -0
- package/src/prompts/tools/learn.md +7 -0
- package/src/prompts/tools/manage-skill.md +9 -0
- package/src/prompts/tools/task.md +3 -0
- package/src/registry/agent-registry.ts +30 -0
- package/src/sdk.ts +88 -24
- package/src/secrets/obfuscator.ts +1 -1
- package/src/session/agent-session.ts +209 -87
- package/src/session/history-storage.ts +2 -2
- package/src/session/indexed-session-storage.ts +7 -17
- package/src/session/session-context.ts +352 -0
- package/src/session/session-entries.ts +194 -0
- package/src/session/session-listing.ts +588 -0
- package/src/session/session-loader.ts +106 -0
- package/src/session/session-manager.ts +933 -3145
- package/src/session/session-migrations.ts +78 -0
- package/src/session/session-paths.ts +193 -0
- package/src/session/session-persistence.ts +131 -0
- package/src/session/session-storage.ts +91 -50
- package/src/session/snapcompact-inline.ts +21 -1
- package/src/session/snapcompact-savings-journal.ts +113 -0
- package/src/session/tool-choice-queue.ts +23 -11
- package/src/slash-commands/builtin-registry.ts +25 -3
- package/src/stt/asr-client.ts +520 -0
- package/src/stt/asr-protocol.ts +65 -0
- package/src/stt/asr-worker.ts +790 -0
- package/src/stt/downloader.ts +107 -47
- package/src/stt/endpointer.ts +259 -0
- package/src/stt/index.ts +5 -1
- package/src/stt/models.ts +150 -0
- package/src/stt/recorder.ts +247 -60
- package/src/stt/stt-controller.ts +201 -22
- package/src/stt/transcriber.ts +37 -68
- package/src/stt/wav.ts +173 -0
- package/src/system-prompt.ts +8 -0
- package/src/task/agents.ts +1 -2
- package/src/task/executor.ts +49 -15
- package/src/task/index.ts +60 -6
- package/src/task/render.ts +83 -8
- package/src/task/types.ts +53 -0
- package/src/tools/ask.ts +8 -0
- package/src/tools/bash.ts +4 -3
- package/src/tools/eval-render.ts +4 -3
- package/src/tools/index.ts +40 -4
- package/src/tools/irc.ts +10 -2
- package/src/tools/job.ts +14 -2
- package/src/tools/learn.ts +144 -0
- package/src/tools/manage-skill.ts +104 -0
- package/src/tools/plan-mode-guard.ts +53 -19
- package/src/tools/renderers.ts +7 -11
- package/src/tools/ssh.ts +4 -3
- package/src/tools/todo.ts +1 -1
- package/src/tools/tts.ts +203 -92
- package/src/tools/write.ts +18 -2
- package/src/tts/downloader.ts +64 -0
- package/src/tts/index.ts +8 -0
- package/src/tts/models.ts +137 -0
- package/src/tts/player.ts +137 -0
- package/src/tts/runtime.ts +21 -0
- package/src/tts/streaming-player.ts +266 -0
- package/src/tts/tts-client.ts +647 -0
- package/src/tts/tts-protocol.ts +60 -0
- package/src/tts/tts-worker.ts +497 -0
- package/src/tts/vocalizer.ts +162 -0
- package/src/tts/wav.ts +58 -0
- package/src/utils/title-generator.ts +48 -5
- package/src/utils/tool-choice.ts +16 -0
- package/src/utils/tools-manager.test.ts +25 -0
- package/src/utils/tools-manager.ts +19 -1
- package/src/web/scrapers/github.ts +96 -0
- package/src/web/search/index.ts +13 -0
- package/src/web/search/providers/searxng.ts +13 -1
- package/dist/types/stt/setup.d.ts +0 -18
- package/src/stt/setup.ts +0 -52
- package/src/stt/transcribe.py +0 -70
|
@@ -0,0 +1,790 @@
|
|
|
1
|
+
import * as fs from "node:fs/promises";
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
import * as os from "node:os";
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
import type {
|
|
6
|
+
AutomaticSpeechRecognitionOutput,
|
|
7
|
+
AutomaticSpeechRecognitionPipeline,
|
|
8
|
+
ProgressInfo,
|
|
9
|
+
} from "@huggingface/transformers";
|
|
10
|
+
import {
|
|
11
|
+
ensureRuntimeInstalled,
|
|
12
|
+
getTinyModelsCacheDir,
|
|
13
|
+
installRuntimeModuleResolver,
|
|
14
|
+
isCompiledBinary,
|
|
15
|
+
resolveRuntimeModule,
|
|
16
|
+
} from "@oh-my-pi/pi-utils";
|
|
17
|
+
import packageJson from "../../package.json" with { type: "json" };
|
|
18
|
+
import { resolveTinyModelDevicePreference, type TinyModelDevice, tinyModelDeviceLoadOrder } from "../tiny/device";
|
|
19
|
+
import { resolveTinyModelDtypeOverride, type TinyModelDtype } from "../tiny/dtype";
|
|
20
|
+
import type { SttProgressEvent, SttTransport, SttWorkerInbound } from "./asr-protocol";
|
|
21
|
+
import { type EndpointerEvent, StreamEndpointer } from "./endpointer";
|
|
22
|
+
import {
|
|
23
|
+
getSttModelSpec,
|
|
24
|
+
type SherpaSttModelSpec,
|
|
25
|
+
type SttModel,
|
|
26
|
+
type SttModelKey,
|
|
27
|
+
type TransformersSttModelSpec,
|
|
28
|
+
} from "./models";
|
|
29
|
+
|
|
30
|
+
const ASR_TASK = "automatic-speech-recognition";
|
|
31
|
+
const TRANSFORMERS_PACKAGE = "@huggingface/transformers";
|
|
32
|
+
const SHERPA_PACKAGE = "sherpa-onnx-node";
|
|
33
|
+
const COMPILED_TRANSFORMERS_VERSION = process.env.PI_TINY_TRANSFORMERS_VERSION;
|
|
34
|
+
// Whisper long-form decoding: split into 30s windows with 5s overlap so audio of
|
|
35
|
+
// any length transcribes without exceeding the 30s receptive field.
|
|
36
|
+
const CHUNK_LENGTH_S = 30;
|
|
37
|
+
const STRIDE_LENGTH_S = 5;
|
|
38
|
+
// The client always resamples to 16 kHz mono float32 before sending; sherpa-onnx
|
|
39
|
+
// is told the true input rate (it resamples internally to its feature config).
|
|
40
|
+
const ASR_SAMPLE_RATE = 16_000;
|
|
41
|
+
// Hub origin for raw sherpa-onnx model files (encoder/decoder/joiner/tokens).
|
|
42
|
+
const HF_RESOLVE_BASE = "https://huggingface.co";
|
|
43
|
+
// Coalesce download progress so streaming a multi-hundred-MB model file doesn't
|
|
44
|
+
// flood the IPC channel with one event per chunk.
|
|
45
|
+
const PROGRESS_EMIT_BYTES = 4_000_000;
|
|
46
|
+
const sourceRequire = createRequire(import.meta.url);
|
|
47
|
+
|
|
48
|
+
const sttModelDevicePreference = resolveTinyModelDevicePreference();
|
|
49
|
+
const sttModelDtypeOverride = resolveTinyModelDtypeOverride();
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Subset of the transformers.js ASR call options we set. The index signature
|
|
53
|
+
* mirrors `GenerationFunctionParameters` so this is assignable to the pipeline's
|
|
54
|
+
* `Partial<AutomaticSpeechRecognitionConfig>` param (not re-exported from the
|
|
55
|
+
* package root, so we model only what we pass).
|
|
56
|
+
*/
|
|
57
|
+
interface AsrCallOptions {
|
|
58
|
+
chunk_length_s: number;
|
|
59
|
+
stride_length_s: number;
|
|
60
|
+
return_timestamps: boolean;
|
|
61
|
+
task?: string;
|
|
62
|
+
language?: string;
|
|
63
|
+
[key: string]: unknown;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
interface TransformersRuntime {
|
|
67
|
+
env: {
|
|
68
|
+
cacheDir?: string;
|
|
69
|
+
allowLocalModels?: boolean;
|
|
70
|
+
logLevel?: unknown;
|
|
71
|
+
};
|
|
72
|
+
LogLevel: {
|
|
73
|
+
ERROR: unknown;
|
|
74
|
+
};
|
|
75
|
+
pipeline: (
|
|
76
|
+
task: typeof ASR_TASK,
|
|
77
|
+
model: string,
|
|
78
|
+
options: {
|
|
79
|
+
device: TinyModelDevice;
|
|
80
|
+
dtype: TinyModelDtype;
|
|
81
|
+
progress_callback: (info: ProgressInfo) => void;
|
|
82
|
+
},
|
|
83
|
+
) => Promise<AutomaticSpeechRecognitionPipeline>;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Recognition result returned by `sherpa-onnx-node`'s offline recognizer. */
|
|
87
|
+
interface SherpaOfflineResult {
|
|
88
|
+
text?: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** A sherpa-onnx offline stream that accepts a single waveform before decoding. */
|
|
92
|
+
interface SherpaOfflineStream {
|
|
93
|
+
acceptWaveform(audio: { samples: Float32Array; sampleRate: number }): void;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface SherpaOfflineRecognizer {
|
|
97
|
+
createStream(): SherpaOfflineStream;
|
|
98
|
+
decodeAsync(stream: SherpaOfflineStream): Promise<SherpaOfflineResult>;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/** Offline recognizer config passed to `sherpa-onnx-node` (transducer family). */
|
|
102
|
+
interface SherpaOfflineConfig {
|
|
103
|
+
modelConfig: {
|
|
104
|
+
transducer: { encoder: string; decoder: string; joiner: string };
|
|
105
|
+
tokens: string;
|
|
106
|
+
modelType: string;
|
|
107
|
+
numThreads: number;
|
|
108
|
+
provider: string;
|
|
109
|
+
debug: number;
|
|
110
|
+
};
|
|
111
|
+
decodingMethod: string;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Subset of the native `sherpa-onnx-node` module surface we use. */
|
|
115
|
+
interface SherpaRuntime {
|
|
116
|
+
OfflineRecognizer: {
|
|
117
|
+
createAsync(config: SherpaOfflineConfig): Promise<SherpaOfflineRecognizer>;
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/** A warm model plus the engine that loaded it; cached per tier key. */
|
|
122
|
+
type LoadedModel =
|
|
123
|
+
| { engine: "transformers"; pipeline: AutomaticSpeechRecognitionPipeline }
|
|
124
|
+
| { engine: "sherpa"; recognizer: SherpaOfflineRecognizer };
|
|
125
|
+
|
|
126
|
+
const models = new Map<SttModelKey, Promise<LoadedModel>>();
|
|
127
|
+
// Serialize all model inference on a single chain: the recognizers are not
|
|
128
|
+
// guaranteed reentrant and there is one CPU-bound model per tier. Batch
|
|
129
|
+
// transcribes and live-stream segment/partial decodes share this lock.
|
|
130
|
+
let modelLock = Promise.resolve();
|
|
131
|
+
function runOnModel<T>(work: () => Promise<T>): Promise<T> {
|
|
132
|
+
const run = modelLock.then(work, work);
|
|
133
|
+
modelLock = run.then(
|
|
134
|
+
() => undefined,
|
|
135
|
+
() => undefined,
|
|
136
|
+
);
|
|
137
|
+
return run;
|
|
138
|
+
}
|
|
139
|
+
let transformersRuntime: Promise<TransformersRuntime> | null = null;
|
|
140
|
+
let sherpaRuntime: Promise<SherpaRuntime> | null = null;
|
|
141
|
+
|
|
142
|
+
let cachedTransformersVersionSpec: string | undefined;
|
|
143
|
+
function resolveTransformersVersionSpec(): string {
|
|
144
|
+
const manifest = packageJson as {
|
|
145
|
+
optionalDependencies?: Record<string, string>;
|
|
146
|
+
dependencies?: Record<string, string>;
|
|
147
|
+
};
|
|
148
|
+
const versionSpec =
|
|
149
|
+
manifest.optionalDependencies?.[TRANSFORMERS_PACKAGE] ?? manifest.dependencies?.[TRANSFORMERS_PACKAGE];
|
|
150
|
+
if (!versionSpec) throw new Error(`${TRANSFORMERS_PACKAGE} is missing from package.json optionalDependencies`);
|
|
151
|
+
if (!versionSpec.startsWith("catalog:")) return versionSpec;
|
|
152
|
+
if (COMPILED_TRANSFORMERS_VERSION) return COMPILED_TRANSFORMERS_VERSION;
|
|
153
|
+
const installed = sourceRequire(`${TRANSFORMERS_PACKAGE}/package.json`) as { version: string };
|
|
154
|
+
return installed.version;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Lazily resolve (and memoize) the transformers version spec. In the `catalog:`
|
|
159
|
+
* case this `require`s the installed package manifest, so defer it to the
|
|
160
|
+
* compiled-binary runtime-install path (only reached on a real transcribe /
|
|
161
|
+
* download) — loading this worker for a smoke ping never triggers the resolve.
|
|
162
|
+
*/
|
|
163
|
+
function getTransformersVersionSpec(): string {
|
|
164
|
+
cachedTransformersVersionSpec ??= resolveTransformersVersionSpec();
|
|
165
|
+
return cachedTransformersVersionSpec;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
let cachedSherpaVersionSpec: string | undefined;
|
|
169
|
+
function resolveSherpaVersionSpec(): string {
|
|
170
|
+
const manifest = packageJson as {
|
|
171
|
+
optionalDependencies?: Record<string, string>;
|
|
172
|
+
dependencies?: Record<string, string>;
|
|
173
|
+
};
|
|
174
|
+
const versionSpec = manifest.optionalDependencies?.[SHERPA_PACKAGE] ?? manifest.dependencies?.[SHERPA_PACKAGE];
|
|
175
|
+
if (!versionSpec) throw new Error(`${SHERPA_PACKAGE} is missing from package.json optionalDependencies`);
|
|
176
|
+
return versionSpec;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function getSherpaVersionSpec(): string {
|
|
180
|
+
cachedSherpaVersionSpec ??= resolveSherpaVersionSpec();
|
|
181
|
+
return cachedSherpaVersionSpec;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function errorText(error: unknown): string {
|
|
185
|
+
return error instanceof Error ? (error.stack ?? error.message) : String(error);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function errorMessage(error: unknown): string {
|
|
189
|
+
return error instanceof Error ? error.message : String(error);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function sendLog(
|
|
193
|
+
transport: SttTransport,
|
|
194
|
+
level: "debug" | "warn" | "error",
|
|
195
|
+
msg: string,
|
|
196
|
+
meta?: Record<string, unknown>,
|
|
197
|
+
): void {
|
|
198
|
+
transport.send({ type: "log", level, msg, meta });
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function getSttRuntimeDir(): string {
|
|
202
|
+
const key = getTransformersVersionSpec().replace(/[^A-Za-z0-9._-]/g, "_");
|
|
203
|
+
return path.join(path.dirname(getTinyModelsCacheDir()), "stt-runtime", `transformers-${key}`);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function getSherpaRuntimeDir(): string {
|
|
207
|
+
const key = getSherpaVersionSpec().replace(/[^A-Za-z0-9._-]/g, "_");
|
|
208
|
+
return path.join(path.dirname(getTinyModelsCacheDir()), "stt-runtime", `sherpa-${key}`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function sendRuntimeInstallProgress(
|
|
212
|
+
transport: SttTransport,
|
|
213
|
+
requestId: string,
|
|
214
|
+
modelKey: SttModelKey,
|
|
215
|
+
status: "initiate" | "download" | "done",
|
|
216
|
+
name: string,
|
|
217
|
+
): void {
|
|
218
|
+
transport.send({ type: "progress", id: requestId, event: { modelKey, status, name } });
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Prepare the freshly-installed compiled runtime for loading: stub `sharp` (the
|
|
223
|
+
* speech pipeline is audio-only, so the native image codec is dead weight) and
|
|
224
|
+
* patch the module resolver so Transformers.js's bare requires resolve against
|
|
225
|
+
* the cache. Returns the absolute Transformers.js entrypoint to `require`.
|
|
226
|
+
*/
|
|
227
|
+
async function prepareCompiledRuntime(runtimeDir: string): Promise<string> {
|
|
228
|
+
const nodeModules = path.join(runtimeDir, "node_modules");
|
|
229
|
+
const sharpStub = path.join(runtimeDir, "omp-sharp-stub.cjs");
|
|
230
|
+
await Bun.write(sharpStub, "module.exports = {};\n");
|
|
231
|
+
installRuntimeModuleResolver({ runtimeNodeModules: nodeModules, stubs: { sharp: sharpStub } });
|
|
232
|
+
const entry = resolveRuntimeModule(nodeModules, TRANSFORMERS_PACKAGE);
|
|
233
|
+
if (!entry) throw new Error(`Unable to resolve ${TRANSFORMERS_PACKAGE} in compiled runtime at ${nodeModules}`);
|
|
234
|
+
return entry;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function configureTransformers(transformers: TransformersRuntime): TransformersRuntime {
|
|
238
|
+
transformers.env.cacheDir = getTinyModelsCacheDir();
|
|
239
|
+
transformers.env.allowLocalModels = false;
|
|
240
|
+
transformers.env.logLevel = transformers.LogLevel.ERROR;
|
|
241
|
+
return transformers;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
async function loadTransformers(
|
|
245
|
+
transport: SttTransport,
|
|
246
|
+
requestId: string,
|
|
247
|
+
modelKey: SttModelKey,
|
|
248
|
+
): Promise<TransformersRuntime> {
|
|
249
|
+
if (transformersRuntime) return transformersRuntime;
|
|
250
|
+
transformersRuntime = (async () => {
|
|
251
|
+
if (!isCompiledBinary()) return configureTransformers(sourceRequire(TRANSFORMERS_PACKAGE) as TransformersRuntime);
|
|
252
|
+
const runtimeDir = await ensureRuntimeInstalled({
|
|
253
|
+
runtimeDir: getSttRuntimeDir(),
|
|
254
|
+
install: {
|
|
255
|
+
dependencies: { [TRANSFORMERS_PACKAGE]: getTransformersVersionSpec() },
|
|
256
|
+
trustedDependencies: ["onnxruntime-node"],
|
|
257
|
+
},
|
|
258
|
+
probePackage: TRANSFORMERS_PACKAGE,
|
|
259
|
+
onPhase: phase =>
|
|
260
|
+
sendRuntimeInstallProgress(
|
|
261
|
+
transport,
|
|
262
|
+
requestId,
|
|
263
|
+
modelKey,
|
|
264
|
+
phase,
|
|
265
|
+
`${TRANSFORMERS_PACKAGE}@${getTransformersVersionSpec()}`,
|
|
266
|
+
),
|
|
267
|
+
});
|
|
268
|
+
const entry = await prepareCompiledRuntime(runtimeDir);
|
|
269
|
+
const require_ = createRequire(entry);
|
|
270
|
+
return configureTransformers(require_(entry) as TransformersRuntime);
|
|
271
|
+
})().catch(error => {
|
|
272
|
+
transformersRuntime = null;
|
|
273
|
+
throw error;
|
|
274
|
+
});
|
|
275
|
+
return transformersRuntime;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Resolve the native `sherpa-onnx-node` module. In a compiled binary the addon
|
|
280
|
+
* (plus its per-platform prebuilt `sherpa-onnx.node` + bundled onnxruntime
|
|
281
|
+
* dylibs) is installed into a side runtime dir; the addon resolves its native
|
|
282
|
+
* library relative to its own location, so a plain `createRequire` of the entry
|
|
283
|
+
* is enough — no module-resolver patch or bare-require stubbing is needed.
|
|
284
|
+
* Memoized so the runtime loads once per process.
|
|
285
|
+
*/
|
|
286
|
+
async function loadSherpaRuntime(
|
|
287
|
+
transport: SttTransport,
|
|
288
|
+
requestId: string,
|
|
289
|
+
modelKey: SttModelKey,
|
|
290
|
+
): Promise<SherpaRuntime> {
|
|
291
|
+
if (sherpaRuntime) return sherpaRuntime;
|
|
292
|
+
sherpaRuntime = (async () => {
|
|
293
|
+
if (!isCompiledBinary()) return sourceRequire(SHERPA_PACKAGE) as SherpaRuntime;
|
|
294
|
+
const runtimeDir = await ensureRuntimeInstalled({
|
|
295
|
+
runtimeDir: getSherpaRuntimeDir(),
|
|
296
|
+
install: { dependencies: { [SHERPA_PACKAGE]: getSherpaVersionSpec() } },
|
|
297
|
+
probePackage: SHERPA_PACKAGE,
|
|
298
|
+
onPhase: phase =>
|
|
299
|
+
sendRuntimeInstallProgress(
|
|
300
|
+
transport,
|
|
301
|
+
requestId,
|
|
302
|
+
modelKey,
|
|
303
|
+
phase,
|
|
304
|
+
`${SHERPA_PACKAGE}@${getSherpaVersionSpec()}`,
|
|
305
|
+
),
|
|
306
|
+
});
|
|
307
|
+
const nodeModules = path.join(runtimeDir, "node_modules");
|
|
308
|
+
const entry = resolveRuntimeModule(nodeModules, SHERPA_PACKAGE);
|
|
309
|
+
if (!entry) throw new Error(`Unable to resolve ${SHERPA_PACKAGE} in compiled runtime at ${nodeModules}`);
|
|
310
|
+
return createRequire(entry)(entry) as SherpaRuntime;
|
|
311
|
+
})().catch(error => {
|
|
312
|
+
sherpaRuntime = null;
|
|
313
|
+
throw error;
|
|
314
|
+
});
|
|
315
|
+
return sherpaRuntime;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
function toProgressEvent(modelKey: SttModelKey, info: ProgressInfo): SttProgressEvent {
|
|
319
|
+
if (info.status === "ready") {
|
|
320
|
+
return { modelKey, status: info.status, task: info.task, model: info.model };
|
|
321
|
+
}
|
|
322
|
+
if (info.status === "progress_total") {
|
|
323
|
+
return {
|
|
324
|
+
modelKey,
|
|
325
|
+
status: info.status,
|
|
326
|
+
name: info.name,
|
|
327
|
+
progress: info.progress,
|
|
328
|
+
loaded: info.loaded,
|
|
329
|
+
total: info.total,
|
|
330
|
+
files: info.files,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
if (info.status === "progress") {
|
|
334
|
+
return {
|
|
335
|
+
modelKey,
|
|
336
|
+
status: info.status,
|
|
337
|
+
name: info.name,
|
|
338
|
+
file: info.file,
|
|
339
|
+
progress: info.progress,
|
|
340
|
+
loaded: info.loaded,
|
|
341
|
+
total: info.total,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
return { modelKey, status: info.status, name: info.name, file: info.file };
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function sendProgress(transport: SttTransport, id: string, modelKey: SttModelKey, info: ProgressInfo): void {
|
|
348
|
+
transport.send({ type: "progress", id, event: toProgressEvent(modelKey, info) });
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
async function loadPipelineOnDevice(
|
|
352
|
+
transformers: TransformersRuntime,
|
|
353
|
+
spec: TransformersSttModelSpec,
|
|
354
|
+
modelKey: SttModelKey,
|
|
355
|
+
transport: SttTransport,
|
|
356
|
+
requestId: string,
|
|
357
|
+
device: TinyModelDevice,
|
|
358
|
+
): Promise<AutomaticSpeechRecognitionPipeline> {
|
|
359
|
+
return transformers.pipeline(ASR_TASK, spec.repo, {
|
|
360
|
+
device,
|
|
361
|
+
dtype: sttModelDtypeOverride ?? spec.dtype,
|
|
362
|
+
progress_callback: info => sendProgress(transport, requestId, modelKey, info),
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
async function loadPipelineWithDeviceFallback(
|
|
367
|
+
transformers: TransformersRuntime,
|
|
368
|
+
spec: TransformersSttModelSpec,
|
|
369
|
+
modelKey: SttModelKey,
|
|
370
|
+
transport: SttTransport,
|
|
371
|
+
requestId: string,
|
|
372
|
+
): Promise<{ pipeline: AutomaticSpeechRecognitionPipeline; device: TinyModelDevice }> {
|
|
373
|
+
const devices = tinyModelDeviceLoadOrder(sttModelDevicePreference);
|
|
374
|
+
if (devices[0] !== sttModelDevicePreference.device) {
|
|
375
|
+
sendLog(transport, "warn", "stt: requested device is unsafe in the worker; using CPU", {
|
|
376
|
+
modelKey,
|
|
377
|
+
repo: spec.repo,
|
|
378
|
+
requestedDevice: sttModelDevicePreference.device,
|
|
379
|
+
device: devices[0],
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
for (let i = 0; i < devices.length; i += 1) {
|
|
383
|
+
const device = devices[i]!;
|
|
384
|
+
try {
|
|
385
|
+
return {
|
|
386
|
+
pipeline: await loadPipelineOnDevice(transformers, spec, modelKey, transport, requestId, device),
|
|
387
|
+
device,
|
|
388
|
+
};
|
|
389
|
+
} catch (error) {
|
|
390
|
+
if (i === devices.length - 1) throw error;
|
|
391
|
+
const fallbackDevice = devices[i + 1]!;
|
|
392
|
+
sendLog(transport, "warn", "stt: accelerated device failed; falling back", {
|
|
393
|
+
modelKey,
|
|
394
|
+
repo: spec.repo,
|
|
395
|
+
device,
|
|
396
|
+
fallbackDevice,
|
|
397
|
+
error: errorMessage(error),
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
throw new Error("No stt model devices configured");
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
async function loadTransformersModel(
|
|
405
|
+
spec: TransformersSttModelSpec,
|
|
406
|
+
modelKey: SttModelKey,
|
|
407
|
+
transport: SttTransport,
|
|
408
|
+
requestId: string,
|
|
409
|
+
): Promise<LoadedModel> {
|
|
410
|
+
const transformers = await loadTransformers(transport, requestId, modelKey);
|
|
411
|
+
const startedAt = performance.now();
|
|
412
|
+
const { pipeline, device } = await loadPipelineWithDeviceFallback(
|
|
413
|
+
transformers,
|
|
414
|
+
spec,
|
|
415
|
+
modelKey,
|
|
416
|
+
transport,
|
|
417
|
+
requestId,
|
|
418
|
+
);
|
|
419
|
+
sendLog(transport, "debug", "stt: local model loaded", {
|
|
420
|
+
modelKey,
|
|
421
|
+
repo: spec.repo,
|
|
422
|
+
engine: "transformers",
|
|
423
|
+
device,
|
|
424
|
+
requestedDevice: sttModelDevicePreference.device,
|
|
425
|
+
dtype: sttModelDtypeOverride ?? spec.dtype,
|
|
426
|
+
elapsedMs: Math.round(performance.now() - startedAt),
|
|
427
|
+
});
|
|
428
|
+
return { engine: "transformers", pipeline };
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Stream a single sherpa-onnx model file from the Hub into the cache, writing to
|
|
433
|
+
* a `.part` sidecar and renaming on completion so an interrupted fetch never
|
|
434
|
+
* reads as cached. Emits coalesced per-file progress for the aggregating client.
|
|
435
|
+
*/
|
|
436
|
+
async function downloadSherpaFile(
|
|
437
|
+
repo: string,
|
|
438
|
+
filename: string,
|
|
439
|
+
dest: string,
|
|
440
|
+
modelKey: SttModelKey,
|
|
441
|
+
transport: SttTransport,
|
|
442
|
+
requestId: string,
|
|
443
|
+
): Promise<void> {
|
|
444
|
+
const url = `${HF_RESOLVE_BASE}/${repo}/resolve/main/${filename}`;
|
|
445
|
+
const response = await fetch(url, { redirect: "follow" });
|
|
446
|
+
if (!response.ok || !response.body) {
|
|
447
|
+
throw new Error(`Failed to download ${filename} (${repo}): HTTP ${response.status}`);
|
|
448
|
+
}
|
|
449
|
+
const total = Number(response.headers.get("content-length") ?? 0);
|
|
450
|
+
transport.send({
|
|
451
|
+
type: "progress",
|
|
452
|
+
id: requestId,
|
|
453
|
+
event: { modelKey, status: "download", name: `${repo}/${filename}`, file: filename },
|
|
454
|
+
});
|
|
455
|
+
const part = `${dest}.part`;
|
|
456
|
+
const handle = await fs.open(part, "w");
|
|
457
|
+
let loaded = 0;
|
|
458
|
+
let lastEmitted = 0;
|
|
459
|
+
const reader = response.body.getReader();
|
|
460
|
+
try {
|
|
461
|
+
for (;;) {
|
|
462
|
+
const { done, value } = await reader.read();
|
|
463
|
+
if (done) break;
|
|
464
|
+
if (!value) continue;
|
|
465
|
+
await handle.write(value);
|
|
466
|
+
loaded += value.byteLength;
|
|
467
|
+
if (loaded - lastEmitted >= PROGRESS_EMIT_BYTES || (total > 0 && loaded >= total)) {
|
|
468
|
+
lastEmitted = loaded;
|
|
469
|
+
transport.send({
|
|
470
|
+
type: "progress",
|
|
471
|
+
id: requestId,
|
|
472
|
+
event: {
|
|
473
|
+
modelKey,
|
|
474
|
+
status: "progress",
|
|
475
|
+
name: `${repo}/${filename}`,
|
|
476
|
+
file: filename,
|
|
477
|
+
loaded,
|
|
478
|
+
total: total || loaded,
|
|
479
|
+
},
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
} finally {
|
|
484
|
+
await handle.close();
|
|
485
|
+
}
|
|
486
|
+
await fs.rename(part, dest);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Ensure all sherpa-onnx model files for a tier are present in the cache,
|
|
491
|
+
* downloading any that are missing, and return their absolute paths.
|
|
492
|
+
*/
|
|
493
|
+
async function ensureSherpaModelFiles(
|
|
494
|
+
spec: SherpaSttModelSpec,
|
|
495
|
+
modelKey: SttModelKey,
|
|
496
|
+
transport: SttTransport,
|
|
497
|
+
requestId: string,
|
|
498
|
+
): Promise<{ encoder: string; decoder: string; joiner: string; tokens: string }> {
|
|
499
|
+
const dir = path.join(getTinyModelsCacheDir(), spec.repo);
|
|
500
|
+
await fs.mkdir(dir, { recursive: true });
|
|
501
|
+
const resolved = {} as { encoder: string; decoder: string; joiner: string; tokens: string };
|
|
502
|
+
for (const role in spec.files) {
|
|
503
|
+
const key = role as keyof typeof spec.files;
|
|
504
|
+
const filename = spec.files[key];
|
|
505
|
+
const dest = path.join(dir, filename);
|
|
506
|
+
const present = await fs
|
|
507
|
+
.stat(dest)
|
|
508
|
+
.then(stats => stats.size > 0)
|
|
509
|
+
.catch(() => false);
|
|
510
|
+
if (!present) await downloadSherpaFile(spec.repo, filename, dest, modelKey, transport, requestId);
|
|
511
|
+
resolved[key] = dest;
|
|
512
|
+
}
|
|
513
|
+
return resolved;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
async function loadSherpaModel(
|
|
517
|
+
spec: SherpaSttModelSpec,
|
|
518
|
+
modelKey: SttModelKey,
|
|
519
|
+
transport: SttTransport,
|
|
520
|
+
requestId: string,
|
|
521
|
+
): Promise<LoadedModel> {
|
|
522
|
+
const runtime = await loadSherpaRuntime(transport, requestId, modelKey);
|
|
523
|
+
const files = await ensureSherpaModelFiles(spec, modelKey, transport, requestId);
|
|
524
|
+
const startedAt = performance.now();
|
|
525
|
+
const numThreads = Math.max(1, Math.min(4, os.availableParallelism()));
|
|
526
|
+
const recognizer = await runtime.OfflineRecognizer.createAsync({
|
|
527
|
+
modelConfig: {
|
|
528
|
+
transducer: { encoder: files.encoder, decoder: files.decoder, joiner: files.joiner },
|
|
529
|
+
tokens: files.tokens,
|
|
530
|
+
modelType: spec.modelType,
|
|
531
|
+
numThreads,
|
|
532
|
+
provider: "cpu",
|
|
533
|
+
debug: 0,
|
|
534
|
+
},
|
|
535
|
+
decodingMethod: "greedy_search",
|
|
536
|
+
});
|
|
537
|
+
sendLog(transport, "debug", "stt: local model loaded", {
|
|
538
|
+
modelKey,
|
|
539
|
+
repo: spec.repo,
|
|
540
|
+
engine: "sherpa",
|
|
541
|
+
provider: "cpu",
|
|
542
|
+
numThreads,
|
|
543
|
+
elapsedMs: Math.round(performance.now() - startedAt),
|
|
544
|
+
});
|
|
545
|
+
return { engine: "sherpa", recognizer };
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
async function loadModel(modelKey: SttModelKey, transport: SttTransport, requestId: string): Promise<LoadedModel> {
|
|
549
|
+
const spec = getSttModelSpec(modelKey);
|
|
550
|
+
if (!spec) throw new Error(`Unknown stt model: ${modelKey}`);
|
|
551
|
+
const cached = models.get(modelKey);
|
|
552
|
+
if (cached) {
|
|
553
|
+
void cached
|
|
554
|
+
.then(() => {
|
|
555
|
+
transport.send({
|
|
556
|
+
type: "progress",
|
|
557
|
+
id: requestId,
|
|
558
|
+
event: { modelKey, status: "ready", task: ASR_TASK, model: spec.repo },
|
|
559
|
+
});
|
|
560
|
+
})
|
|
561
|
+
.catch(() => undefined);
|
|
562
|
+
return cached;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
const loading =
|
|
566
|
+
spec.engine === "sherpa"
|
|
567
|
+
? loadSherpaModel(spec, modelKey, transport, requestId)
|
|
568
|
+
: loadTransformersModel(spec, modelKey, transport, requestId);
|
|
569
|
+
const loaded = loading.then(
|
|
570
|
+
model => {
|
|
571
|
+
transport.send({
|
|
572
|
+
type: "progress",
|
|
573
|
+
id: requestId,
|
|
574
|
+
event: { modelKey, status: "ready", task: ASR_TASK, model: spec.repo },
|
|
575
|
+
});
|
|
576
|
+
return model;
|
|
577
|
+
},
|
|
578
|
+
error => {
|
|
579
|
+
models.delete(modelKey);
|
|
580
|
+
throw error;
|
|
581
|
+
},
|
|
582
|
+
);
|
|
583
|
+
models.set(modelKey, loaded);
|
|
584
|
+
return loaded;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
async function decodeSegment(
|
|
588
|
+
model: LoadedModel,
|
|
589
|
+
spec: SttModel,
|
|
590
|
+
audio: Float32Array,
|
|
591
|
+
language: string | undefined,
|
|
592
|
+
): Promise<string> {
|
|
593
|
+
if (model.engine === "sherpa") {
|
|
594
|
+
const stream = model.recognizer.createStream();
|
|
595
|
+
stream.acceptWaveform({ samples: audio, sampleRate: ASR_SAMPLE_RATE });
|
|
596
|
+
const result = await model.recognizer.decodeAsync(stream);
|
|
597
|
+
return (result.text ?? "").trim();
|
|
598
|
+
}
|
|
599
|
+
const options: AsrCallOptions = {
|
|
600
|
+
chunk_length_s: CHUNK_LENGTH_S,
|
|
601
|
+
stride_length_s: STRIDE_LENGTH_S,
|
|
602
|
+
return_timestamps: false,
|
|
603
|
+
};
|
|
604
|
+
// English-only Whisper checkpoints reject `language`/`task`; multilingual ones
|
|
605
|
+
// take the configured source language (auto-detected when omitted).
|
|
606
|
+
if (!spec.englishOnly) {
|
|
607
|
+
options.task = "transcribe";
|
|
608
|
+
if (language) options.language = language;
|
|
609
|
+
}
|
|
610
|
+
const output = (await model.pipeline(audio, options)) as AutomaticSpeechRecognitionOutput;
|
|
611
|
+
return (output.text ?? "").trim();
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
async function transcribeAudio(
|
|
615
|
+
transport: SttTransport,
|
|
616
|
+
requestId: string,
|
|
617
|
+
modelKey: SttModelKey,
|
|
618
|
+
audio: Float32Array,
|
|
619
|
+
language: string | undefined,
|
|
620
|
+
): Promise<string> {
|
|
621
|
+
const spec = getSttModelSpec(modelKey);
|
|
622
|
+
if (!spec) throw new Error(`Unknown stt model: ${modelKey}`);
|
|
623
|
+
const model = await loadModel(modelKey, transport, requestId);
|
|
624
|
+
return runOnModel(() => decodeSegment(model, spec, audio, language));
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
async function handleBatchRequest(
|
|
628
|
+
transport: SttTransport,
|
|
629
|
+
request: Extract<SttWorkerInbound, { type: "transcribe" | "download" }>,
|
|
630
|
+
): Promise<void> {
|
|
631
|
+
try {
|
|
632
|
+
if (request.type === "download") {
|
|
633
|
+
await loadModel(request.modelKey, transport, request.id);
|
|
634
|
+
transport.send({ type: "downloaded", id: request.id });
|
|
635
|
+
return;
|
|
636
|
+
}
|
|
637
|
+
const text = await transcribeAudio(transport, request.id, request.modelKey, request.audio, request.language);
|
|
638
|
+
transport.send({ type: "transcription", id: request.id, text });
|
|
639
|
+
} catch (error) {
|
|
640
|
+
transport.send({ type: "error", id: request.id, error: errorText(error) });
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// ── Live streaming sessions ─────────────────────────────────────────
|
|
645
|
+
|
|
646
|
+
/** State for one in-flight {@link StreamEndpointer}-driven streaming session. */
|
|
647
|
+
interface StreamingSession {
|
|
648
|
+
id: string;
|
|
649
|
+
spec: SttModel;
|
|
650
|
+
language: string | undefined;
|
|
651
|
+
model: Promise<LoadedModel>;
|
|
652
|
+
endpointer: StreamEndpointer;
|
|
653
|
+
/** Finalized segments awaiting decode, in order. */
|
|
654
|
+
segmentQueue: Float32Array[];
|
|
655
|
+
/** Latest in-progress segment audio awaiting a volatile partial decode (coalesced). */
|
|
656
|
+
pendingPartial: Float32Array | null;
|
|
657
|
+
/** Committed segment transcripts, joined for the final result. */
|
|
658
|
+
committed: string[];
|
|
659
|
+
segmentIndex: number;
|
|
660
|
+
pumping: boolean;
|
|
661
|
+
cancelled: boolean;
|
|
662
|
+
ended: boolean;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const sessions = new Map<string, StreamingSession>();
|
|
666
|
+
|
|
667
|
+
function startStreamingSession(
|
|
668
|
+
transport: SttTransport,
|
|
669
|
+
request: Extract<SttWorkerInbound, { type: "stream_start" }>,
|
|
670
|
+
): void {
|
|
671
|
+
const spec = getSttModelSpec(request.modelKey);
|
|
672
|
+
if (!spec) {
|
|
673
|
+
transport.send({ type: "error", id: request.id, error: `Unknown stt model: ${request.modelKey}` });
|
|
674
|
+
return;
|
|
675
|
+
}
|
|
676
|
+
sessions.set(request.id, {
|
|
677
|
+
id: request.id,
|
|
678
|
+
spec,
|
|
679
|
+
language: request.language,
|
|
680
|
+
model: loadModel(request.modelKey, transport, request.id),
|
|
681
|
+
endpointer: new StreamEndpointer(),
|
|
682
|
+
segmentQueue: [],
|
|
683
|
+
pendingPartial: null,
|
|
684
|
+
committed: [],
|
|
685
|
+
segmentIndex: 0,
|
|
686
|
+
pumping: false,
|
|
687
|
+
cancelled: false,
|
|
688
|
+
ended: false,
|
|
689
|
+
});
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function ingestStreamEvents(session: StreamingSession, events: EndpointerEvent[]): void {
|
|
693
|
+
for (const event of events) {
|
|
694
|
+
if (event.kind === "segment") session.segmentQueue.push(event.audio);
|
|
695
|
+
else session.pendingPartial = event.audio;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
/**
|
|
700
|
+
* Drain a session's pending work: finalized segments first (committed in order),
|
|
701
|
+
* then a single coalesced partial preview. Re-entrant-safe via `pumping`; new
|
|
702
|
+
* audio that arrives mid-decode is picked up when the current decode resolves.
|
|
703
|
+
*/
|
|
704
|
+
async function pumpSession(session: StreamingSession, transport: SttTransport): Promise<void> {
|
|
705
|
+
if (session.pumping) return;
|
|
706
|
+
session.pumping = true;
|
|
707
|
+
try {
|
|
708
|
+
const model = await session.model;
|
|
709
|
+
while (!session.cancelled) {
|
|
710
|
+
if (session.segmentQueue.length > 0) {
|
|
711
|
+
const audio = session.segmentQueue.shift()!;
|
|
712
|
+
// A fresh segment supersedes any queued preview for the prior one.
|
|
713
|
+
session.pendingPartial = null;
|
|
714
|
+
const text = await runOnModel(() => decodeSegment(model, session.spec, audio, session.language));
|
|
715
|
+
if (session.cancelled) return;
|
|
716
|
+
if (text.length > 0) {
|
|
717
|
+
session.committed.push(text);
|
|
718
|
+
transport.send({ type: "segment", id: session.id, index: session.segmentIndex++, text });
|
|
719
|
+
}
|
|
720
|
+
continue;
|
|
721
|
+
}
|
|
722
|
+
if (session.pendingPartial) {
|
|
723
|
+
const audio = session.pendingPartial;
|
|
724
|
+
session.pendingPartial = null;
|
|
725
|
+
const text = await runOnModel(() => decodeSegment(model, session.spec, audio, session.language));
|
|
726
|
+
if (session.cancelled) return;
|
|
727
|
+
// Skip a now-stale preview if a segment finalized mid-decode.
|
|
728
|
+
if (text.length > 0 && session.segmentQueue.length === 0) {
|
|
729
|
+
transport.send({ type: "partial", id: session.id, text });
|
|
730
|
+
}
|
|
731
|
+
continue;
|
|
732
|
+
}
|
|
733
|
+
break;
|
|
734
|
+
}
|
|
735
|
+
if (session.ended && !session.cancelled && session.segmentQueue.length === 0 && !session.pendingPartial) {
|
|
736
|
+
transport.send({ type: "stream_done", id: session.id, text: session.committed.join(" ") });
|
|
737
|
+
sessions.delete(session.id);
|
|
738
|
+
}
|
|
739
|
+
} catch (error) {
|
|
740
|
+
if (!session.cancelled) transport.send({ type: "error", id: session.id, error: errorText(error) });
|
|
741
|
+
sessions.delete(session.id);
|
|
742
|
+
} finally {
|
|
743
|
+
session.pumping = false;
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
function handleStreamMessage(
|
|
748
|
+
transport: SttTransport,
|
|
749
|
+
message: Extract<SttWorkerInbound, { type: "stream_start" | "stream_audio" | "stream_stop" | "stream_cancel" }>,
|
|
750
|
+
): void {
|
|
751
|
+
if (message.type === "stream_start") {
|
|
752
|
+
startStreamingSession(transport, message);
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
const session = sessions.get(message.id);
|
|
756
|
+
if (!session || session.cancelled) return;
|
|
757
|
+
switch (message.type) {
|
|
758
|
+
case "stream_audio":
|
|
759
|
+
ingestStreamEvents(session, session.endpointer.push(message.audio));
|
|
760
|
+
void pumpSession(session, transport);
|
|
761
|
+
return;
|
|
762
|
+
case "stream_stop":
|
|
763
|
+
session.ended = true;
|
|
764
|
+
session.pendingPartial = null;
|
|
765
|
+
ingestStreamEvents(session, session.endpointer.flush());
|
|
766
|
+
void pumpSession(session, transport);
|
|
767
|
+
return;
|
|
768
|
+
case "stream_cancel":
|
|
769
|
+
session.cancelled = true;
|
|
770
|
+
sessions.delete(message.id);
|
|
771
|
+
return;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
export function startSttWorker(transport: SttTransport): void {
|
|
776
|
+
transport.onMessage(message => {
|
|
777
|
+
switch (message.type) {
|
|
778
|
+
case "ping":
|
|
779
|
+
transport.send({ type: "pong", id: message.id });
|
|
780
|
+
return;
|
|
781
|
+
case "transcribe":
|
|
782
|
+
case "download":
|
|
783
|
+
void handleBatchRequest(transport, message);
|
|
784
|
+
return;
|
|
785
|
+
default:
|
|
786
|
+
handleStreamMessage(transport, message);
|
|
787
|
+
return;
|
|
788
|
+
}
|
|
789
|
+
});
|
|
790
|
+
}
|