tjbot-ce 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +382 -0
- package/dist/camera/camera.d.ts +62 -0
- package/dist/camera/camera.d.ts.map +1 -0
- package/dist/camera/camera.js +155 -0
- package/dist/camera/camera.js.map +1 -0
- package/dist/camera/index.d.ts +18 -0
- package/dist/camera/index.d.ts.map +1 -0
- package/dist/camera/index.js +18 -0
- package/dist/camera/index.js.map +1 -0
- package/dist/config/config-types.d.ts +75 -0
- package/dist/config/config-types.d.ts.map +1 -0
- package/dist/config/config-types.generated.d.ts +495 -0
- package/dist/config/config-types.generated.d.ts.map +1 -0
- package/dist/config/config-types.generated.js +2 -0
- package/dist/config/config-types.generated.js.map +1 -0
- package/dist/config/config-types.js +175 -0
- package/dist/config/config-types.js.map +1 -0
- package/dist/config/index.d.ts +20 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +19 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/tjbot-config.d.ts +98 -0
- package/dist/config/tjbot-config.d.ts.map +1 -0
- package/dist/config/tjbot-config.js +309 -0
- package/dist/config/tjbot-config.js.map +1 -0
- package/dist/config/vendor/colors.yaml +61 -0
- package/dist/config/vendor/model-registry.yaml +275 -0
- package/dist/config/vendor/tjbot-config.schema.yaml +792 -0
- package/dist/config/vendor/tjbot.default.toml +452 -0
- package/dist/led/index.d.ts +20 -0
- package/dist/led/index.d.ts.map +1 -0
- package/dist/led/index.js +20 -0
- package/dist/led/index.js.map +1 -0
- package/dist/led/led-common-anode.d.ts +38 -0
- package/dist/led/led-common-anode.d.ts.map +1 -0
- package/dist/led/led-common-anode.js +79 -0
- package/dist/led/led-common-anode.js.map +1 -0
- package/dist/led/led-neopixel-spi.d.ts +60 -0
- package/dist/led/led-neopixel-spi.d.ts.map +1 -0
- package/dist/led/led-neopixel-spi.js +216 -0
- package/dist/led/led-neopixel-spi.js.map +1 -0
- package/dist/led/led-neopixel-ws281x.js +186 -0
- package/dist/led/led-neopixel.d.ts +57 -0
- package/dist/led/led-neopixel.d.ts.map +1 -0
- package/dist/led/led-neopixel.js +235 -0
- package/dist/led/led-neopixel.js.map +1 -0
- package/dist/microphone/index.d.ts +18 -0
- package/dist/microphone/index.d.ts.map +1 -0
- package/dist/microphone/index.js +18 -0
- package/dist/microphone/index.js.map +1 -0
- package/dist/microphone/microphone.d.ts +65 -0
- package/dist/microphone/microphone.d.ts.map +1 -0
- package/dist/microphone/microphone.js +179 -0
- package/dist/microphone/microphone.js.map +1 -0
- package/dist/rpi-drivers/index.d.ts +22 -0
- package/dist/rpi-drivers/index.d.ts.map +1 -0
- package/dist/rpi-drivers/index.js +22 -0
- package/dist/rpi-drivers/index.js.map +1 -0
- package/dist/rpi-drivers/rpi-detect.d.ts +24 -0
- package/dist/rpi-drivers/rpi-detect.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi-detect.js +49 -0
- package/dist/rpi-drivers/rpi-detect.js.map +1 -0
- package/dist/rpi-drivers/rpi-driver.d.ts +116 -0
- package/dist/rpi-drivers/rpi-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi-driver.js +261 -0
- package/dist/rpi-drivers/rpi-driver.js.map +1 -0
- package/dist/rpi-drivers/rpi3-driver.d.ts +47 -0
- package/dist/rpi-drivers/rpi3-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi3-driver.js +145 -0
- package/dist/rpi-drivers/rpi3-driver.js.map +1 -0
- package/dist/rpi-drivers/rpi4-driver.d.ts +35 -0
- package/dist/rpi-drivers/rpi4-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi4-driver.js +101 -0
- package/dist/rpi-drivers/rpi4-driver.js.map +1 -0
- package/dist/rpi-drivers/rpi5-driver.d.ts +33 -0
- package/dist/rpi-drivers/rpi5-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi5-driver.js +78 -0
- package/dist/rpi-drivers/rpi5-driver.js.map +1 -0
- package/dist/servo/index.d.ts +19 -0
- package/dist/servo/index.d.ts.map +1 -0
- package/dist/servo/index.js +19 -0
- package/dist/servo/index.js.map +1 -0
- package/dist/servo/servo-constants.d.ts +33 -0
- package/dist/servo/servo-constants.d.ts.map +1 -0
- package/dist/servo/servo-constants.js +34 -0
- package/dist/servo/servo-constants.js.map +1 -0
- package/dist/servo/servo-lgpio.d.ts +82 -0
- package/dist/servo/servo-lgpio.d.ts.map +1 -0
- package/dist/servo/servo-lgpio.js +178 -0
- package/dist/servo/servo-lgpio.js.map +1 -0
- package/dist/speaker/audio-player.d.ts +30 -0
- package/dist/speaker/audio-player.d.ts.map +1 -0
- package/dist/speaker/audio-player.js +68 -0
- package/dist/speaker/audio-player.js.map +1 -0
- package/dist/speaker/index.d.ts +18 -0
- package/dist/speaker/index.d.ts.map +1 -0
- package/dist/speaker/index.js +18 -0
- package/dist/speaker/index.js.map +1 -0
- package/dist/speaker/speaker.d.ts +53 -0
- package/dist/speaker/speaker.d.ts.map +1 -0
- package/dist/speaker/speaker.js +125 -0
- package/dist/speaker/speaker.js.map +1 -0
- package/dist/stt/backends/azure-stt.d.ts +32 -0
- package/dist/stt/backends/azure-stt.d.ts.map +1 -0
- package/dist/stt/backends/azure-stt.js +227 -0
- package/dist/stt/backends/azure-stt.js.map +1 -0
- package/dist/stt/backends/google-cloud-stt.d.ts +31 -0
- package/dist/stt/backends/google-cloud-stt.d.ts.map +1 -0
- package/dist/stt/backends/google-cloud-stt.js +371 -0
- package/dist/stt/backends/google-cloud-stt.js.map +1 -0
- package/dist/stt/backends/ibm-watson-stt.d.ts +32 -0
- package/dist/stt/backends/ibm-watson-stt.d.ts.map +1 -0
- package/dist/stt/backends/ibm-watson-stt.js +190 -0
- package/dist/stt/backends/ibm-watson-stt.js.map +1 -0
- package/dist/stt/backends/sherpa-onnx-stt.d.ts +117 -0
- package/dist/stt/backends/sherpa-onnx-stt.d.ts.map +1 -0
- package/dist/stt/backends/sherpa-onnx-stt.js +694 -0
- package/dist/stt/backends/sherpa-onnx-stt.js.map +1 -0
- package/dist/stt/index.d.ts +20 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +21 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/stt/stt-engine.d.ts +68 -0
- package/dist/stt/stt-engine.d.ts.map +1 -0
- package/dist/stt/stt-engine.js +99 -0
- package/dist/stt/stt-engine.js.map +1 -0
- package/dist/stt/stt-utils.d.ts +36 -0
- package/dist/stt/stt-utils.d.ts.map +1 -0
- package/dist/stt/stt-utils.js +112 -0
- package/dist/stt/stt-utils.js.map +1 -0
- package/dist/stt/stt.d.ts +52 -0
- package/dist/stt/stt.d.ts.map +1 -0
- package/dist/stt/stt.js +100 -0
- package/dist/stt/stt.js.map +1 -0
- package/dist/tjbot.d.ts +317 -0
- package/dist/tjbot.d.ts.map +1 -0
- package/dist/tjbot.js +736 -0
- package/dist/tjbot.js.map +1 -0
- package/dist/tts/backends/azure-tts.d.ts +30 -0
- package/dist/tts/backends/azure-tts.d.ts.map +1 -0
- package/dist/tts/backends/azure-tts.js +92 -0
- package/dist/tts/backends/azure-tts.js.map +1 -0
- package/dist/tts/backends/google-cloud-tts.d.ts +38 -0
- package/dist/tts/backends/google-cloud-tts.d.ts.map +1 -0
- package/dist/tts/backends/google-cloud-tts.js +116 -0
- package/dist/tts/backends/google-cloud-tts.js.map +1 -0
- package/dist/tts/backends/ibm-watson-tts.d.ts +42 -0
- package/dist/tts/backends/ibm-watson-tts.d.ts.map +1 -0
- package/dist/tts/backends/ibm-watson-tts.js +99 -0
- package/dist/tts/backends/ibm-watson-tts.js.map +1 -0
- package/dist/tts/backends/sherpa-onnx-tts.d.ts +80 -0
- package/dist/tts/backends/sherpa-onnx-tts.d.ts.map +1 -0
- package/dist/tts/backends/sherpa-onnx-tts.js +237 -0
- package/dist/tts/backends/sherpa-onnx-tts.js.map +1 -0
- package/dist/tts/index.d.ts +19 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +20 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/tts-engine.d.ts +67 -0
- package/dist/tts/tts-engine.d.ts.map +1 -0
- package/dist/tts/tts-engine.js +109 -0
- package/dist/tts/tts-engine.js.map +1 -0
- package/dist/tts/tts.d.ts +47 -0
- package/dist/tts/tts.d.ts.map +1 -0
- package/dist/tts/tts.js +101 -0
- package/dist/tts/tts.js.map +1 -0
- package/dist/utils/colors.d.ts +39 -0
- package/dist/utils/colors.d.ts.map +1 -0
- package/dist/utils/colors.js +155 -0
- package/dist/utils/colors.js.map +1 -0
- package/dist/utils/constants.d.ts +41 -0
- package/dist/utils/constants.d.ts.map +1 -0
- package/dist/utils/constants.js +43 -0
- package/dist/utils/constants.js.map +1 -0
- package/dist/utils/credentials.d.ts +43 -0
- package/dist/utils/credentials.d.ts.map +1 -0
- package/dist/utils/credentials.js +121 -0
- package/dist/utils/credentials.js.map +1 -0
- package/dist/utils/errors.d.ts +26 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +32 -0
- package/dist/utils/errors.js.map +1 -0
- package/dist/utils/index.d.ts +25 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +23 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logging.d.ts +44 -0
- package/dist/utils/logging.d.ts.map +1 -0
- package/dist/utils/logging.js +113 -0
- package/dist/utils/logging.js.map +1 -0
- package/dist/utils/model-registry.d.ts +142 -0
- package/dist/utils/model-registry.d.ts.map +1 -0
- package/dist/utils/model-registry.js +391 -0
- package/dist/utils/model-registry.js.map +1 -0
- package/dist/utils/utils.d.ts +33 -0
- package/dist/utils/utils.d.ts.map +1 -0
- package/dist/utils/utils.js +50 -0
- package/dist/utils/utils.js.map +1 -0
- package/dist/vision/backends/azure-vision.d.ts +33 -0
- package/dist/vision/backends/azure-vision.d.ts.map +1 -0
- package/dist/vision/backends/azure-vision.js +151 -0
- package/dist/vision/backends/azure-vision.js.map +1 -0
- package/dist/vision/backends/google-cloud-vision.d.ts +32 -0
- package/dist/vision/backends/google-cloud-vision.d.ts.map +1 -0
- package/dist/vision/backends/google-cloud-vision.js +193 -0
- package/dist/vision/backends/google-cloud-vision.js.map +1 -0
- package/dist/vision/backends/onnx.d.ts +116 -0
- package/dist/vision/backends/onnx.d.ts.map +1 -0
- package/dist/vision/backends/onnx.js +781 -0
- package/dist/vision/backends/onnx.js.map +1 -0
- package/dist/vision/index.d.ts +19 -0
- package/dist/vision/index.d.ts.map +1 -0
- package/dist/vision/index.js +20 -0
- package/dist/vision/index.js.map +1 -0
- package/dist/vision/vision-engine.d.ts +131 -0
- package/dist/vision/vision-engine.d.ts.map +1 -0
- package/dist/vision/vision-engine.js +97 -0
- package/dist/vision/vision-engine.js.map +1 -0
- package/dist/vision/vision.d.ts +48 -0
- package/dist/vision/vision.d.ts.map +1 -0
- package/dist/vision/vision.js +83 -0
- package/dist/vision/vision.js.map +1 -0
- package/package.json +124 -0
|
@@ -0,0 +1,694 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2026-present TJBot Contributors. All Rights Reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import { ModelRegistry, TJBotError } from '../../utils/index.js';
|
|
18
|
+
import { getLogger } from '../../utils/logging.js';
|
|
19
|
+
import { STTEngine } from '../stt-engine.js';
|
|
20
|
+
const logger = getLogger(import.meta.url);
|
|
21
|
+
// Lazy require sherpa-onnx to avoid hard dependency issues
|
|
22
|
+
let sherpa;
|
|
23
|
+
/**
|
|
24
|
+
* Sherpa-ONNX Speech-to-Text Engine
|
|
25
|
+
*
|
|
26
|
+
* Enhanced local speech recognition using Sherpa-ONNX library with support for:
|
|
27
|
+
* - Multiple model types (Moonshine, Whisper, Zipformer, Paraformer)
|
|
28
|
+
* - Streaming and offline recognition modes
|
|
29
|
+
* - Voice Activity Detection (VAD) for better endpointing
|
|
30
|
+
* - Automatic model download and caching
|
|
31
|
+
*
|
|
32
|
+
* @public
|
|
33
|
+
*/
|
|
34
|
+
export class SherpaONNXSTTEngine extends STTEngine {
|
|
35
|
+
registry = ModelRegistry.getInstance();
|
|
36
|
+
modelInfo;
|
|
37
|
+
modelPaths;
|
|
38
|
+
vadPath;
|
|
39
|
+
vad;
|
|
40
|
+
recognizer;
|
|
41
|
+
async initialize() {
|
|
42
|
+
const config = this.config;
|
|
43
|
+
const vadConfig = config.vad;
|
|
44
|
+
if (!config.model) {
|
|
45
|
+
throw new TJBotError('Sherpa-ONNX STT model not specified. Provide model name in listen.backend.sherpa-onnx config.');
|
|
46
|
+
}
|
|
47
|
+
// Load sherpa-onnx
|
|
48
|
+
if (!sherpa) {
|
|
49
|
+
// Set environment variables to reduce noisy logging
|
|
50
|
+
process.env.SHERPA_ONNX_LOG_LEVEL = 'OFF';
|
|
51
|
+
const module = await import('sherpa-onnx-node');
|
|
52
|
+
// CommonJS module imported as ES module has exports in .default
|
|
53
|
+
sherpa = (module.default || module);
|
|
54
|
+
logger.debug('successfully loaded sherpa-onnx-node module');
|
|
55
|
+
}
|
|
56
|
+
// Load STT model from registry
|
|
57
|
+
const modelName = config.model;
|
|
58
|
+
logger.info(`Loading STT model: ${modelName}`);
|
|
59
|
+
this.modelInfo = await this.registry.loadModel(modelName);
|
|
60
|
+
const modelCacheDir = this.registry.getModelCacheDirForType('stt');
|
|
61
|
+
const modelDir = path.join(modelCacheDir, this.modelInfo.folder);
|
|
62
|
+
this.modelPaths = this.pathsForModelKey(this.modelInfo.key, modelDir);
|
|
63
|
+
// Download VAD model if needed for offline recognition
|
|
64
|
+
if (vadConfig && this.modelInfo) {
|
|
65
|
+
if (this.modelInfo.kind.startsWith('offline') && vadConfig.enabled) {
|
|
66
|
+
const vadModelName = vadConfig.model;
|
|
67
|
+
logger.info(`Loading VAD model: ${vadModelName}`);
|
|
68
|
+
const vadInfo = await this.registry.loadModel(vadModelName);
|
|
69
|
+
const vadCacheDir = this.registry.getModelCacheDirForType('vad');
|
|
70
|
+
this.vadPath = path.join(vadCacheDir, vadInfo.folder, vadInfo.required[0]);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// Create the STT recognizer and VAD as needed
|
|
74
|
+
await this.setupRecognizer();
|
|
75
|
+
logger.info('Sherpa-ONNX STT engine initialized');
|
|
76
|
+
}
|
|
77
|
+
async transcribe(micStream, options) {
|
|
78
|
+
const config = this.config;
|
|
79
|
+
if (!sherpa || !this.recognizer) {
|
|
80
|
+
throw new TJBotError('Sherpa-ONNX STT service not initialized. Call initialize() first.');
|
|
81
|
+
}
|
|
82
|
+
if (!this.modelInfo) {
|
|
83
|
+
throw new TJBotError('Model info not set. Ensure initialize() was called.');
|
|
84
|
+
}
|
|
85
|
+
logger.verbose(`Transcribing speech with Sherpa-ONNX STT (model=${this.modelInfo.key}, kind=${this.modelInfo.kind})`);
|
|
86
|
+
try {
|
|
87
|
+
this.ensureStream(micStream);
|
|
88
|
+
const inputRate = config.microphoneRate ?? 16000;
|
|
89
|
+
// Route to appropriate transcription method based on model type
|
|
90
|
+
if (this.modelInfo.kind === 'streaming' || this.modelInfo.kind === 'streaming-zipformer') {
|
|
91
|
+
return await this.transcribeStreaming(micStream, inputRate, options);
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
const useVad = this.shouldUseVad();
|
|
95
|
+
return await this.transcribeOffline(micStream, inputRate, useVad, options);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
throw new TJBotError('Transcription failed', { cause: error });
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Determine if VAD should be used
|
|
104
|
+
*/
|
|
105
|
+
shouldUseVad() {
|
|
106
|
+
const config = this.config;
|
|
107
|
+
if (!this.modelInfo) {
|
|
108
|
+
throw new TJBotError('Model info not set. Ensure initialize() was called.');
|
|
109
|
+
}
|
|
110
|
+
const vadConfig = config.vad;
|
|
111
|
+
const vadEnabled = vadConfig.enabled ?? true;
|
|
112
|
+
const isOffline = this.modelInfo.kind.startsWith('offline');
|
|
113
|
+
return isOffline && vadEnabled && Boolean(this.vadPath);
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Setup recognizer and VAD based on model configuration
|
|
117
|
+
*/
|
|
118
|
+
async setupRecognizer() {
|
|
119
|
+
if (!this.modelInfo) {
|
|
120
|
+
throw new TJBotError('Model info not set. Ensure initialize() was called.');
|
|
121
|
+
}
|
|
122
|
+
if (!this.modelPaths) {
|
|
123
|
+
throw new TJBotError('Model paths not set. Ensure initialize() was called.');
|
|
124
|
+
}
|
|
125
|
+
// Create recognizer once if not already created (model is constant after initialize())
|
|
126
|
+
if (!this.recognizer) {
|
|
127
|
+
if (this.modelInfo.kind === 'streaming') {
|
|
128
|
+
this.recognizer = this.createOnlineRecognizer(this.modelPaths);
|
|
129
|
+
}
|
|
130
|
+
else if (this.modelInfo.kind === 'streaming-zipformer') {
|
|
131
|
+
this.recognizer = this.createZipformerRecognizer(this.modelPaths);
|
|
132
|
+
}
|
|
133
|
+
else if (this.modelInfo.kind === 'offline-whisper') {
|
|
134
|
+
this.recognizer = this.createWhisperRecognizer(this.modelPaths);
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
this.recognizer = this.createOfflineRecognizer(this.modelPaths);
|
|
138
|
+
}
|
|
139
|
+
logger.debug(`created recognizer for model: ${this.modelInfo.key} (${this.modelInfo.kind})`);
|
|
140
|
+
}
|
|
141
|
+
// Setup VAD if needed
|
|
142
|
+
if (this.vadPath && !this.vad) {
|
|
143
|
+
this.vad = this.createSileroVad(this.vadPath);
|
|
144
|
+
logger.debug('created Silero VAD instance');
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Get the paths for all of the model files for a given model key.
|
|
149
|
+
* @param key The model key (e.g. "moonshine-tiny", "whisper-tiny", "zipformer-en", "paraformer-en")
|
|
150
|
+
* @param baseDir The folder in which the model exists.
|
|
151
|
+
* @returns An STTModelPaths object containing the paths to the model files.
|
|
152
|
+
*/
|
|
153
|
+
pathsForModelKey(key, baseDir) {
|
|
154
|
+
// Moonshine models (both tiny and base)
|
|
155
|
+
if (key.startsWith('moonshine')) {
|
|
156
|
+
return {
|
|
157
|
+
preprocessor: path.join(baseDir, 'preprocess.onnx'),
|
|
158
|
+
encoder: path.join(baseDir, 'encode.int8.onnx'),
|
|
159
|
+
uncachedDecoder: path.join(baseDir, 'uncached_decode.int8.onnx'),
|
|
160
|
+
cachedDecoder: path.join(baseDir, 'cached_decode.int8.onnx'),
|
|
161
|
+
tokens: path.join(baseDir, 'tokens.txt'),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
if (key === 'whisper-tiny') {
|
|
165
|
+
return {
|
|
166
|
+
encoder: path.join(baseDir, 'tiny.en-encoder.int8.onnx'),
|
|
167
|
+
decoder: path.join(baseDir, 'tiny.en-decoder.int8.onnx'),
|
|
168
|
+
tokens: path.join(baseDir, 'tiny.en-tokens.txt'),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
if (key === 'whisper-base') {
|
|
172
|
+
return {
|
|
173
|
+
encoder: path.join(baseDir, 'base.en-encoder.int8.onnx'),
|
|
174
|
+
decoder: path.join(baseDir, 'base.en-decoder.int8.onnx'),
|
|
175
|
+
tokens: path.join(baseDir, 'base.en-tokens.txt'),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
if (key === 'zipformer-en') {
|
|
179
|
+
return {
|
|
180
|
+
encoder: path.join(baseDir, 'encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx'),
|
|
181
|
+
decoder: path.join(baseDir, 'decoder-epoch-99-avg-1-chunk-16-left-128.onnx'),
|
|
182
|
+
joiner: path.join(baseDir, 'joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx'),
|
|
183
|
+
tokens: path.join(baseDir, 'tokens.txt'),
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
// Paraformer
|
|
187
|
+
if (key === 'paraformer-en') {
|
|
188
|
+
return {
|
|
189
|
+
encoder: path.join(baseDir, 'encoder.int8.onnx'),
|
|
190
|
+
decoder: path.join(baseDir, 'decoder.int8.onnx'),
|
|
191
|
+
tokens: path.join(baseDir, 'tokens.txt'),
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
throw new TJBotError(`Unsupported model key: ${key}`);
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Extract and validate required paths for Paraformer online recognizer.
|
|
198
|
+
* @throws {TJBotError} if required paths are missing
|
|
199
|
+
*/
|
|
200
|
+
validateParaformerPaths(modelPaths) {
|
|
201
|
+
if (!modelPaths.decoder) {
|
|
202
|
+
throw new TJBotError('Paraformer model requires decoder path');
|
|
203
|
+
}
|
|
204
|
+
return {
|
|
205
|
+
encoder: modelPaths.encoder,
|
|
206
|
+
decoder: modelPaths.decoder,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Extract and validate required paths for Zipformer online recognizer.
|
|
211
|
+
* @throws {TJBotError} if required paths are missing
|
|
212
|
+
*/
|
|
213
|
+
validateZipformerPaths(modelPaths) {
|
|
214
|
+
if (!modelPaths.decoder) {
|
|
215
|
+
throw new TJBotError('Zipformer model requires decoder path');
|
|
216
|
+
}
|
|
217
|
+
if (!modelPaths.joiner) {
|
|
218
|
+
throw new TJBotError('Zipformer model requires joiner path');
|
|
219
|
+
}
|
|
220
|
+
return {
|
|
221
|
+
encoder: modelPaths.encoder,
|
|
222
|
+
decoder: modelPaths.decoder,
|
|
223
|
+
joiner: modelPaths.joiner,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Extract and validate required paths for Moonshine offline recognizer.
|
|
228
|
+
* @throws {TJBotError} if required paths are missing
|
|
229
|
+
*/
|
|
230
|
+
validateMoonshinePaths(modelPaths) {
|
|
231
|
+
if (!modelPaths.preprocessor) {
|
|
232
|
+
throw new TJBotError('Moonshine model requires preprocessor path');
|
|
233
|
+
}
|
|
234
|
+
if (!modelPaths.uncachedDecoder) {
|
|
235
|
+
throw new TJBotError('Moonshine model requires uncachedDecoder path');
|
|
236
|
+
}
|
|
237
|
+
if (!modelPaths.cachedDecoder) {
|
|
238
|
+
throw new TJBotError('Moonshine model requires cachedDecoder path');
|
|
239
|
+
}
|
|
240
|
+
return {
|
|
241
|
+
preprocessor: modelPaths.preprocessor,
|
|
242
|
+
encoder: modelPaths.encoder,
|
|
243
|
+
uncachedDecoder: modelPaths.uncachedDecoder,
|
|
244
|
+
cachedDecoder: modelPaths.cachedDecoder,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Extract and validate required paths for Whisper offline recognizer.
|
|
249
|
+
* @throws {TJBotError} if required paths are missing
|
|
250
|
+
*/
|
|
251
|
+
validateWhisperPaths(modelPaths) {
|
|
252
|
+
if (!modelPaths.decoder) {
|
|
253
|
+
throw new TJBotError('Whisper model requires decoder path');
|
|
254
|
+
}
|
|
255
|
+
return {
|
|
256
|
+
encoder: modelPaths.encoder,
|
|
257
|
+
decoder: modelPaths.decoder,
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Create online recognizer for streaming Paraformer models
|
|
262
|
+
*/
|
|
263
|
+
createOnlineRecognizer(modelPaths) {
|
|
264
|
+
if (!sherpa) {
|
|
265
|
+
throw new TJBotError('Sherpa-ONNX not initialized');
|
|
266
|
+
}
|
|
267
|
+
const paths = this.validateParaformerPaths(modelPaths);
|
|
268
|
+
const config = {
|
|
269
|
+
featConfig: { sampleRate: 16000, featureDim: 80 },
|
|
270
|
+
modelConfig: {
|
|
271
|
+
paraformer: {
|
|
272
|
+
encoder: paths.encoder,
|
|
273
|
+
decoder: paths.decoder,
|
|
274
|
+
},
|
|
275
|
+
tokens: modelPaths.tokens,
|
|
276
|
+
numThreads: 2,
|
|
277
|
+
provider: 'cpu',
|
|
278
|
+
debug: 0,
|
|
279
|
+
},
|
|
280
|
+
decodingMethod: 'greedy_search',
|
|
281
|
+
maxActivePaths: 4,
|
|
282
|
+
enableEndpoint: true,
|
|
283
|
+
rule1MinTrailingSilence: 2.4,
|
|
284
|
+
rule2MinTrailingSilence: 1.2,
|
|
285
|
+
rule3MinUtteranceLength: 1.2,
|
|
286
|
+
};
|
|
287
|
+
return new sherpa.OnlineRecognizer(config);
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Create Zipformer recognizer for streaming transducer models
|
|
291
|
+
*/
|
|
292
|
+
createZipformerRecognizer(modelPaths) {
|
|
293
|
+
if (!sherpa) {
|
|
294
|
+
throw new TJBotError('Sherpa-ONNX not initialized');
|
|
295
|
+
}
|
|
296
|
+
const paths = this.validateZipformerPaths(modelPaths);
|
|
297
|
+
const config = {
|
|
298
|
+
featConfig: { sampleRate: 16000, featureDim: 80 },
|
|
299
|
+
modelConfig: {
|
|
300
|
+
transducer: {
|
|
301
|
+
encoder: paths.encoder,
|
|
302
|
+
decoder: paths.decoder,
|
|
303
|
+
joiner: paths.joiner,
|
|
304
|
+
},
|
|
305
|
+
tokens: modelPaths.tokens,
|
|
306
|
+
numThreads: 2,
|
|
307
|
+
provider: 'cpu',
|
|
308
|
+
debug: 0,
|
|
309
|
+
},
|
|
310
|
+
decodingMethod: 'greedy_search',
|
|
311
|
+
maxActivePaths: 4,
|
|
312
|
+
enableEndpoint: true,
|
|
313
|
+
rule1MinTrailingSilence: 2.4,
|
|
314
|
+
rule2MinTrailingSilence: 1.2,
|
|
315
|
+
rule3MinUtteranceLength: 1.2,
|
|
316
|
+
};
|
|
317
|
+
return new sherpa.OnlineRecognizer(config);
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Create offline recognizer for Moonshine models
|
|
321
|
+
*/
|
|
322
|
+
createOfflineRecognizer(modelPaths) {
|
|
323
|
+
if (!sherpa) {
|
|
324
|
+
throw new TJBotError('Sherpa-ONNX not initialized');
|
|
325
|
+
}
|
|
326
|
+
// Verify model files exist
|
|
327
|
+
const paths = this.validateMoonshinePaths(modelPaths);
|
|
328
|
+
const config = {
|
|
329
|
+
featConfig: { sampleRate: 16000, featureDim: 80 },
|
|
330
|
+
modelConfig: {
|
|
331
|
+
moonshine: {
|
|
332
|
+
preprocessor: paths.preprocessor,
|
|
333
|
+
encoder: paths.encoder,
|
|
334
|
+
uncachedDecoder: paths.uncachedDecoder,
|
|
335
|
+
cachedDecoder: paths.cachedDecoder,
|
|
336
|
+
},
|
|
337
|
+
tokens: modelPaths.tokens,
|
|
338
|
+
numThreads: 2,
|
|
339
|
+
provider: 'cpu',
|
|
340
|
+
debug: 0,
|
|
341
|
+
},
|
|
342
|
+
decodingMethod: 'greedy_search',
|
|
343
|
+
};
|
|
344
|
+
logger.debug('creating Moonshine recognizer with config:', JSON.stringify(config, null, 2));
|
|
345
|
+
try {
|
|
346
|
+
const recognizer = new sherpa.OfflineRecognizer(config);
|
|
347
|
+
return recognizer;
|
|
348
|
+
}
|
|
349
|
+
catch (error) {
|
|
350
|
+
logger.error('Failed to create Moonshine recognizer:', error);
|
|
351
|
+
throw new TJBotError(`Failed to create Moonshine recognizer: ${error}`, { cause: error });
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Create Whisper offline recognizer
|
|
356
|
+
*/
|
|
357
|
+
createWhisperRecognizer(modelPaths) {
|
|
358
|
+
if (!sherpa) {
|
|
359
|
+
throw new TJBotError('Sherpa-ONNX not initialized');
|
|
360
|
+
}
|
|
361
|
+
// Verify model files exist
|
|
362
|
+
const paths = this.validateWhisperPaths(modelPaths);
|
|
363
|
+
const config = {
|
|
364
|
+
featConfig: { sampleRate: 16000, featureDim: 80 },
|
|
365
|
+
modelConfig: {
|
|
366
|
+
whisper: {
|
|
367
|
+
encoder: paths.encoder,
|
|
368
|
+
decoder: paths.decoder,
|
|
369
|
+
},
|
|
370
|
+
tokens: modelPaths.tokens,
|
|
371
|
+
numThreads: 2,
|
|
372
|
+
provider: 'cpu',
|
|
373
|
+
debug: 0,
|
|
374
|
+
},
|
|
375
|
+
decodingMethod: 'greedy_search',
|
|
376
|
+
};
|
|
377
|
+
logger.debug('creating Whisper recognizer with config:', JSON.stringify(config, null, 2));
|
|
378
|
+
try {
|
|
379
|
+
const recognizer = new sherpa.OfflineRecognizer(config);
|
|
380
|
+
logger.debug('Whisper recognizer created successfully');
|
|
381
|
+
return recognizer;
|
|
382
|
+
}
|
|
383
|
+
catch (error) {
|
|
384
|
+
logger.error('Failed to create Whisper recognizer:', error);
|
|
385
|
+
throw new TJBotError(`Failed to create Whisper recognizer: ${error}`, { cause: error });
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Create Silero VAD instance
|
|
390
|
+
*/
|
|
391
|
+
createSileroVad(modelPath) {
|
|
392
|
+
if (!sherpa) {
|
|
393
|
+
throw new TJBotError('Sherpa-ONNX not initialized');
|
|
394
|
+
}
|
|
395
|
+
const config = {
|
|
396
|
+
sileroVad: {
|
|
397
|
+
model: modelPath,
|
|
398
|
+
threshold: 0.5,
|
|
399
|
+
minSpeechDuration: 0.25,
|
|
400
|
+
minSilenceDuration: 0.5,
|
|
401
|
+
windowSize: 512,
|
|
402
|
+
},
|
|
403
|
+
sampleRate: 16000,
|
|
404
|
+
debug: false,
|
|
405
|
+
numThreads: 1,
|
|
406
|
+
};
|
|
407
|
+
const bufferSizeInSeconds = 60;
|
|
408
|
+
logger.debug('creating Silero VAD with config:', JSON.stringify(config, null, 2));
|
|
409
|
+
return new sherpa.Vad(config, bufferSizeInSeconds);
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Transcribe using streaming recognition
|
|
413
|
+
*/
|
|
414
|
+
async transcribeStreaming(micStream, sampleRate, options) {
|
|
415
|
+
if (!this.recognizer) {
|
|
416
|
+
throw new TJBotError('Recognizer not initialized. Ensure initialize() was called.');
|
|
417
|
+
}
|
|
418
|
+
return new Promise((resolve, reject) => {
|
|
419
|
+
// For streaming (online) recognizers, narrow type to OnlineRecognizer
|
|
420
|
+
const recognizer = this.recognizer;
|
|
421
|
+
const stream = recognizer.createStream();
|
|
422
|
+
let lastText = '';
|
|
423
|
+
let finalText = '';
|
|
424
|
+
const cleanup = () => {
|
|
425
|
+
micStream.removeAllListeners();
|
|
426
|
+
};
|
|
427
|
+
// Handle abort signal
|
|
428
|
+
if (options.abortSignal) {
|
|
429
|
+
options.abortSignal.addEventListener('abort', () => {
|
|
430
|
+
cleanup();
|
|
431
|
+
resolve(finalText || lastText);
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
micStream.on('data', (chunk) => {
|
|
435
|
+
try {
|
|
436
|
+
const samples = this.bufferToFloat32LE(chunk);
|
|
437
|
+
stream.acceptWaveform({ sampleRate, samples });
|
|
438
|
+
while (recognizer.isReady(stream)) {
|
|
439
|
+
recognizer.decode(stream);
|
|
440
|
+
}
|
|
441
|
+
const isEndpoint = recognizer.isEndpoint(stream);
|
|
442
|
+
let text = recognizer.getResult(stream).text.trim().toLowerCase();
|
|
443
|
+
if (isEndpoint) {
|
|
444
|
+
// Add tail padding for better recognition
|
|
445
|
+
const tailPadding = new Float32Array(sampleRate * 1.5);
|
|
446
|
+
stream.acceptWaveform({
|
|
447
|
+
samples: tailPadding,
|
|
448
|
+
sampleRate,
|
|
449
|
+
});
|
|
450
|
+
while (recognizer.isReady(stream)) {
|
|
451
|
+
recognizer.decode(stream);
|
|
452
|
+
}
|
|
453
|
+
text = recognizer.getResult(stream).text.trim().toLowerCase();
|
|
454
|
+
}
|
|
455
|
+
if (text && text !== lastText) {
|
|
456
|
+
lastText = text;
|
|
457
|
+
if (options.onPartialResult) {
|
|
458
|
+
options.onPartialResult(text);
|
|
459
|
+
}
|
|
460
|
+
if (isEndpoint) {
|
|
461
|
+
finalText = text;
|
|
462
|
+
if (options.onFinalResult) {
|
|
463
|
+
options.onFinalResult(text);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
if (isEndpoint) {
|
|
468
|
+
recognizer.reset(stream);
|
|
469
|
+
cleanup();
|
|
470
|
+
resolve(finalText);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
catch (error) {
|
|
474
|
+
cleanup();
|
|
475
|
+
reject(new TJBotError('Streaming transcription failed', { cause: error }));
|
|
476
|
+
}
|
|
477
|
+
});
|
|
478
|
+
micStream.on('end', () => {
|
|
479
|
+
cleanup();
|
|
480
|
+
resolve(finalText || lastText);
|
|
481
|
+
});
|
|
482
|
+
micStream.on('error', (error) => {
|
|
483
|
+
cleanup();
|
|
484
|
+
reject(new TJBotError('Microphone stream error', { cause: error }));
|
|
485
|
+
});
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Transcribe using offline recognition with optional VAD
|
|
490
|
+
*/
|
|
491
|
+
async transcribeOffline(micStream, sampleRate, useVad, options) {
|
|
492
|
+
if (useVad && this.vadPath) {
|
|
493
|
+
return await this.transcribeOfflineWithVad(micStream, sampleRate, options);
|
|
494
|
+
}
|
|
495
|
+
else {
|
|
496
|
+
return await this.transcribeOfflineEnergy(micStream, sampleRate, options);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Transcribe offline with Silero VAD
|
|
501
|
+
*/
|
|
502
|
+
async transcribeOfflineWithVad(micStream, sampleRate, options) {
|
|
503
|
+
if (!this.recognizer) {
|
|
504
|
+
throw new TJBotError('Recognizer not initialized');
|
|
505
|
+
}
|
|
506
|
+
if (!this.vadPath) {
|
|
507
|
+
throw new TJBotError('VAD model path not initialized');
|
|
508
|
+
}
|
|
509
|
+
if (!sherpa) {
|
|
510
|
+
throw new TJBotError('Sherpa-ONNX not initialized');
|
|
511
|
+
}
|
|
512
|
+
// Narrow types for use in Promise callbacks
|
|
513
|
+
const recognizer = this.recognizer;
|
|
514
|
+
const vad = this.createSileroVad(this.vadPath);
|
|
515
|
+
const module = sherpa;
|
|
516
|
+
return new Promise((resolve, reject) => {
|
|
517
|
+
const bufferSizeInSeconds = 30;
|
|
518
|
+
const buffer = new module.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
|
519
|
+
const transcripts = [];
|
|
520
|
+
const cleanup = () => {
|
|
521
|
+
micStream.removeAllListeners();
|
|
522
|
+
};
|
|
523
|
+
// Handle abort signal
|
|
524
|
+
if (options.abortSignal) {
|
|
525
|
+
options.abortSignal.addEventListener('abort', () => {
|
|
526
|
+
cleanup();
|
|
527
|
+
resolve(transcripts.join(' '));
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
micStream.on('data', (chunk) => {
|
|
531
|
+
try {
|
|
532
|
+
const samples = this.bufferToFloat32LE(chunk);
|
|
533
|
+
buffer.push(samples);
|
|
534
|
+
const windowSize = vad.config.sileroVad.windowSize;
|
|
535
|
+
while (buffer.size() > windowSize) {
|
|
536
|
+
const windowSamples = buffer.get(buffer.head(), windowSize);
|
|
537
|
+
buffer.pop(windowSize);
|
|
538
|
+
vad.acceptWaveform(windowSamples);
|
|
539
|
+
}
|
|
540
|
+
while (!vad.isEmpty()) {
|
|
541
|
+
const segment = vad.front();
|
|
542
|
+
vad.pop();
|
|
543
|
+
const stream = recognizer.createStream();
|
|
544
|
+
stream.acceptWaveform({
|
|
545
|
+
samples: segment.samples,
|
|
546
|
+
sampleRate,
|
|
547
|
+
});
|
|
548
|
+
recognizer.decode(stream);
|
|
549
|
+
const result = recognizer.getResult(stream);
|
|
550
|
+
const text = result.text.trim().toLowerCase();
|
|
551
|
+
if (text) {
|
|
552
|
+
transcripts.push(text);
|
|
553
|
+
if (options.onPartialResult) {
|
|
554
|
+
options.onPartialResult(text);
|
|
555
|
+
}
|
|
556
|
+
// Resolve after first complete utterance (single-shot behavior)
|
|
557
|
+
cleanup();
|
|
558
|
+
if (options.onFinalResult) {
|
|
559
|
+
options.onFinalResult(text);
|
|
560
|
+
}
|
|
561
|
+
resolve(text);
|
|
562
|
+
return;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
catch (error) {
|
|
567
|
+
cleanup();
|
|
568
|
+
reject(new TJBotError('Offline VAD transcription failed', { cause: error }));
|
|
569
|
+
}
|
|
570
|
+
});
|
|
571
|
+
micStream.on('end', () => {
|
|
572
|
+
cleanup();
|
|
573
|
+
const finalText = transcripts.join(' ');
|
|
574
|
+
if (options.onFinalResult) {
|
|
575
|
+
options.onFinalResult(finalText);
|
|
576
|
+
}
|
|
577
|
+
resolve(finalText);
|
|
578
|
+
});
|
|
579
|
+
micStream.on('error', (error) => {
|
|
580
|
+
cleanup();
|
|
581
|
+
reject(new TJBotError('Microphone stream error', { cause: error }));
|
|
582
|
+
});
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
/**
|
|
586
|
+
* Transcribe offline with simple energy-based silence detection
|
|
587
|
+
*/
|
|
588
|
+
async transcribeOfflineEnergy(micStream, sampleRate, options) {
|
|
589
|
+
if (!this.recognizer) {
|
|
590
|
+
throw new TJBotError('Recognizer not initialized');
|
|
591
|
+
}
|
|
592
|
+
return new Promise((resolve, reject) => {
|
|
593
|
+
// Narrow recognizer to OfflineRecognizer for offline methods
|
|
594
|
+
const recognizer = this.recognizer;
|
|
595
|
+
const speechChunks = [];
|
|
596
|
+
let silenceMs = 0;
|
|
597
|
+
const silenceLimitMs = 700;
|
|
598
|
+
const rmsThreshold = 1e-4;
|
|
599
|
+
const transcripts = [];
|
|
600
|
+
const cleanup = () => {
|
|
601
|
+
micStream.removeAllListeners();
|
|
602
|
+
};
|
|
603
|
+
// Handle abort signal
|
|
604
|
+
if (options.abortSignal) {
|
|
605
|
+
options.abortSignal.addEventListener('abort', () => {
|
|
606
|
+
cleanup();
|
|
607
|
+
resolve(transcripts.join(' '));
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
micStream.on('data', (chunk) => {
|
|
611
|
+
try {
|
|
612
|
+
const samples = this.bufferToFloat32LE(chunk);
|
|
613
|
+
const rms = this.getRMS(samples);
|
|
614
|
+
const durationMs = (samples.length / sampleRate) * 1000;
|
|
615
|
+
if (rms > rmsThreshold) {
|
|
616
|
+
speechChunks.push(samples);
|
|
617
|
+
silenceMs = 0;
|
|
618
|
+
}
|
|
619
|
+
else {
|
|
620
|
+
silenceMs += durationMs;
|
|
621
|
+
}
|
|
622
|
+
if (speechChunks.length > 0 && silenceMs >= silenceLimitMs) {
|
|
623
|
+
// Combine speech chunks
|
|
624
|
+
const total = speechChunks.reduce((acc, arr) => acc + arr.length, 0);
|
|
625
|
+
const combined = new Float32Array(total);
|
|
626
|
+
let offset = 0;
|
|
627
|
+
for (const arr of speechChunks) {
|
|
628
|
+
combined.set(arr, offset);
|
|
629
|
+
offset += arr.length;
|
|
630
|
+
}
|
|
631
|
+
const stream = recognizer.createStream();
|
|
632
|
+
stream.acceptWaveform({ samples: combined, sampleRate });
|
|
633
|
+
recognizer.decode(stream);
|
|
634
|
+
const result = recognizer.getResult(stream);
|
|
635
|
+
const text = result.text.trim().toLowerCase();
|
|
636
|
+
if (text) {
|
|
637
|
+
transcripts.push(text);
|
|
638
|
+
if (options.onPartialResult) {
|
|
639
|
+
options.onPartialResult(text);
|
|
640
|
+
}
|
|
641
|
+
// Resolve after first complete utterance (single-shot behavior)
|
|
642
|
+
cleanup();
|
|
643
|
+
if (options.onFinalResult) {
|
|
644
|
+
options.onFinalResult(text);
|
|
645
|
+
}
|
|
646
|
+
resolve(text);
|
|
647
|
+
return;
|
|
648
|
+
}
|
|
649
|
+
speechChunks.length = 0;
|
|
650
|
+
silenceMs = 0;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
catch (error) {
|
|
654
|
+
cleanup();
|
|
655
|
+
reject(new TJBotError('Offline energy transcription failed', { cause: error }));
|
|
656
|
+
}
|
|
657
|
+
});
|
|
658
|
+
micStream.on('end', () => {
|
|
659
|
+
cleanup();
|
|
660
|
+
const finalText = transcripts.join(' ');
|
|
661
|
+
if (options.onFinalResult) {
|
|
662
|
+
options.onFinalResult(finalText);
|
|
663
|
+
}
|
|
664
|
+
resolve(finalText);
|
|
665
|
+
});
|
|
666
|
+
micStream.on('error', (error) => {
|
|
667
|
+
cleanup();
|
|
668
|
+
reject(new TJBotError('Microphone stream error', { cause: error }));
|
|
669
|
+
});
|
|
670
|
+
});
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Convert Int16 PCM buffer to Float32 samples
|
|
674
|
+
*/
|
|
675
|
+
bufferToFloat32LE(buf) {
|
|
676
|
+
const len = buf.length / 2;
|
|
677
|
+
const out = new Float32Array(len);
|
|
678
|
+
for (let i = 0; i < len; ++i) {
|
|
679
|
+
out[i] = buf.readInt16LE(i * 2) / 32768;
|
|
680
|
+
}
|
|
681
|
+
return out;
|
|
682
|
+
}
|
|
683
|
+
/**
|
|
684
|
+
* Calculate RMS (Root Mean Square) of audio samples
|
|
685
|
+
*/
|
|
686
|
+
getRMS(samples) {
|
|
687
|
+
let sum = 0;
|
|
688
|
+
for (let i = 0; i < samples.length; i++) {
|
|
689
|
+
sum += samples[i] * samples[i];
|
|
690
|
+
}
|
|
691
|
+
return Math.sqrt(sum / samples.length);
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
//# sourceMappingURL=sherpa-onnx-stt.js.map
|