tjbot-ce 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +382 -0
- package/dist/camera/camera.d.ts +62 -0
- package/dist/camera/camera.d.ts.map +1 -0
- package/dist/camera/camera.js +155 -0
- package/dist/camera/camera.js.map +1 -0
- package/dist/camera/index.d.ts +18 -0
- package/dist/camera/index.d.ts.map +1 -0
- package/dist/camera/index.js +18 -0
- package/dist/camera/index.js.map +1 -0
- package/dist/config/config-types.d.ts +75 -0
- package/dist/config/config-types.d.ts.map +1 -0
- package/dist/config/config-types.generated.d.ts +495 -0
- package/dist/config/config-types.generated.d.ts.map +1 -0
- package/dist/config/config-types.generated.js +2 -0
- package/dist/config/config-types.generated.js.map +1 -0
- package/dist/config/config-types.js +175 -0
- package/dist/config/config-types.js.map +1 -0
- package/dist/config/index.d.ts +20 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +19 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/tjbot-config.d.ts +98 -0
- package/dist/config/tjbot-config.d.ts.map +1 -0
- package/dist/config/tjbot-config.js +309 -0
- package/dist/config/tjbot-config.js.map +1 -0
- package/dist/config/vendor/colors.yaml +61 -0
- package/dist/config/vendor/model-registry.yaml +275 -0
- package/dist/config/vendor/tjbot-config.schema.yaml +792 -0
- package/dist/config/vendor/tjbot.default.toml +452 -0
- package/dist/led/index.d.ts +20 -0
- package/dist/led/index.d.ts.map +1 -0
- package/dist/led/index.js +20 -0
- package/dist/led/index.js.map +1 -0
- package/dist/led/led-common-anode.d.ts +38 -0
- package/dist/led/led-common-anode.d.ts.map +1 -0
- package/dist/led/led-common-anode.js +79 -0
- package/dist/led/led-common-anode.js.map +1 -0
- package/dist/led/led-neopixel-spi.d.ts +60 -0
- package/dist/led/led-neopixel-spi.d.ts.map +1 -0
- package/dist/led/led-neopixel-spi.js +216 -0
- package/dist/led/led-neopixel-spi.js.map +1 -0
- package/dist/led/led-neopixel-ws281x.js +186 -0
- package/dist/led/led-neopixel.d.ts +57 -0
- package/dist/led/led-neopixel.d.ts.map +1 -0
- package/dist/led/led-neopixel.js +235 -0
- package/dist/led/led-neopixel.js.map +1 -0
- package/dist/microphone/index.d.ts +18 -0
- package/dist/microphone/index.d.ts.map +1 -0
- package/dist/microphone/index.js +18 -0
- package/dist/microphone/index.js.map +1 -0
- package/dist/microphone/microphone.d.ts +65 -0
- package/dist/microphone/microphone.d.ts.map +1 -0
- package/dist/microphone/microphone.js +179 -0
- package/dist/microphone/microphone.js.map +1 -0
- package/dist/rpi-drivers/index.d.ts +22 -0
- package/dist/rpi-drivers/index.d.ts.map +1 -0
- package/dist/rpi-drivers/index.js +22 -0
- package/dist/rpi-drivers/index.js.map +1 -0
- package/dist/rpi-drivers/rpi-detect.d.ts +24 -0
- package/dist/rpi-drivers/rpi-detect.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi-detect.js +49 -0
- package/dist/rpi-drivers/rpi-detect.js.map +1 -0
- package/dist/rpi-drivers/rpi-driver.d.ts +116 -0
- package/dist/rpi-drivers/rpi-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi-driver.js +261 -0
- package/dist/rpi-drivers/rpi-driver.js.map +1 -0
- package/dist/rpi-drivers/rpi3-driver.d.ts +47 -0
- package/dist/rpi-drivers/rpi3-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi3-driver.js +145 -0
- package/dist/rpi-drivers/rpi3-driver.js.map +1 -0
- package/dist/rpi-drivers/rpi4-driver.d.ts +35 -0
- package/dist/rpi-drivers/rpi4-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi4-driver.js +101 -0
- package/dist/rpi-drivers/rpi4-driver.js.map +1 -0
- package/dist/rpi-drivers/rpi5-driver.d.ts +33 -0
- package/dist/rpi-drivers/rpi5-driver.d.ts.map +1 -0
- package/dist/rpi-drivers/rpi5-driver.js +78 -0
- package/dist/rpi-drivers/rpi5-driver.js.map +1 -0
- package/dist/servo/index.d.ts +19 -0
- package/dist/servo/index.d.ts.map +1 -0
- package/dist/servo/index.js +19 -0
- package/dist/servo/index.js.map +1 -0
- package/dist/servo/servo-constants.d.ts +33 -0
- package/dist/servo/servo-constants.d.ts.map +1 -0
- package/dist/servo/servo-constants.js +34 -0
- package/dist/servo/servo-constants.js.map +1 -0
- package/dist/servo/servo-lgpio.d.ts +82 -0
- package/dist/servo/servo-lgpio.d.ts.map +1 -0
- package/dist/servo/servo-lgpio.js +178 -0
- package/dist/servo/servo-lgpio.js.map +1 -0
- package/dist/speaker/audio-player.d.ts +30 -0
- package/dist/speaker/audio-player.d.ts.map +1 -0
- package/dist/speaker/audio-player.js +68 -0
- package/dist/speaker/audio-player.js.map +1 -0
- package/dist/speaker/index.d.ts +18 -0
- package/dist/speaker/index.d.ts.map +1 -0
- package/dist/speaker/index.js +18 -0
- package/dist/speaker/index.js.map +1 -0
- package/dist/speaker/speaker.d.ts +53 -0
- package/dist/speaker/speaker.d.ts.map +1 -0
- package/dist/speaker/speaker.js +125 -0
- package/dist/speaker/speaker.js.map +1 -0
- package/dist/stt/backends/azure-stt.d.ts +32 -0
- package/dist/stt/backends/azure-stt.d.ts.map +1 -0
- package/dist/stt/backends/azure-stt.js +227 -0
- package/dist/stt/backends/azure-stt.js.map +1 -0
- package/dist/stt/backends/google-cloud-stt.d.ts +31 -0
- package/dist/stt/backends/google-cloud-stt.d.ts.map +1 -0
- package/dist/stt/backends/google-cloud-stt.js +371 -0
- package/dist/stt/backends/google-cloud-stt.js.map +1 -0
- package/dist/stt/backends/ibm-watson-stt.d.ts +32 -0
- package/dist/stt/backends/ibm-watson-stt.d.ts.map +1 -0
- package/dist/stt/backends/ibm-watson-stt.js +190 -0
- package/dist/stt/backends/ibm-watson-stt.js.map +1 -0
- package/dist/stt/backends/sherpa-onnx-stt.d.ts +117 -0
- package/dist/stt/backends/sherpa-onnx-stt.d.ts.map +1 -0
- package/dist/stt/backends/sherpa-onnx-stt.js +694 -0
- package/dist/stt/backends/sherpa-onnx-stt.js.map +1 -0
- package/dist/stt/index.d.ts +20 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +21 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/stt/stt-engine.d.ts +68 -0
- package/dist/stt/stt-engine.d.ts.map +1 -0
- package/dist/stt/stt-engine.js +99 -0
- package/dist/stt/stt-engine.js.map +1 -0
- package/dist/stt/stt-utils.d.ts +36 -0
- package/dist/stt/stt-utils.d.ts.map +1 -0
- package/dist/stt/stt-utils.js +112 -0
- package/dist/stt/stt-utils.js.map +1 -0
- package/dist/stt/stt.d.ts +52 -0
- package/dist/stt/stt.d.ts.map +1 -0
- package/dist/stt/stt.js +100 -0
- package/dist/stt/stt.js.map +1 -0
- package/dist/tjbot.d.ts +317 -0
- package/dist/tjbot.d.ts.map +1 -0
- package/dist/tjbot.js +736 -0
- package/dist/tjbot.js.map +1 -0
- package/dist/tts/backends/azure-tts.d.ts +30 -0
- package/dist/tts/backends/azure-tts.d.ts.map +1 -0
- package/dist/tts/backends/azure-tts.js +92 -0
- package/dist/tts/backends/azure-tts.js.map +1 -0
- package/dist/tts/backends/google-cloud-tts.d.ts +38 -0
- package/dist/tts/backends/google-cloud-tts.d.ts.map +1 -0
- package/dist/tts/backends/google-cloud-tts.js +116 -0
- package/dist/tts/backends/google-cloud-tts.js.map +1 -0
- package/dist/tts/backends/ibm-watson-tts.d.ts +42 -0
- package/dist/tts/backends/ibm-watson-tts.d.ts.map +1 -0
- package/dist/tts/backends/ibm-watson-tts.js +99 -0
- package/dist/tts/backends/ibm-watson-tts.js.map +1 -0
- package/dist/tts/backends/sherpa-onnx-tts.d.ts +80 -0
- package/dist/tts/backends/sherpa-onnx-tts.d.ts.map +1 -0
- package/dist/tts/backends/sherpa-onnx-tts.js +237 -0
- package/dist/tts/backends/sherpa-onnx-tts.js.map +1 -0
- package/dist/tts/index.d.ts +19 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +20 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/tts-engine.d.ts +67 -0
- package/dist/tts/tts-engine.d.ts.map +1 -0
- package/dist/tts/tts-engine.js +109 -0
- package/dist/tts/tts-engine.js.map +1 -0
- package/dist/tts/tts.d.ts +47 -0
- package/dist/tts/tts.d.ts.map +1 -0
- package/dist/tts/tts.js +101 -0
- package/dist/tts/tts.js.map +1 -0
- package/dist/utils/colors.d.ts +39 -0
- package/dist/utils/colors.d.ts.map +1 -0
- package/dist/utils/colors.js +155 -0
- package/dist/utils/colors.js.map +1 -0
- package/dist/utils/constants.d.ts +41 -0
- package/dist/utils/constants.d.ts.map +1 -0
- package/dist/utils/constants.js +43 -0
- package/dist/utils/constants.js.map +1 -0
- package/dist/utils/credentials.d.ts +43 -0
- package/dist/utils/credentials.d.ts.map +1 -0
- package/dist/utils/credentials.js +121 -0
- package/dist/utils/credentials.js.map +1 -0
- package/dist/utils/errors.d.ts +26 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +32 -0
- package/dist/utils/errors.js.map +1 -0
- package/dist/utils/index.d.ts +25 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +23 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logging.d.ts +44 -0
- package/dist/utils/logging.d.ts.map +1 -0
- package/dist/utils/logging.js +113 -0
- package/dist/utils/logging.js.map +1 -0
- package/dist/utils/model-registry.d.ts +142 -0
- package/dist/utils/model-registry.d.ts.map +1 -0
- package/dist/utils/model-registry.js +391 -0
- package/dist/utils/model-registry.js.map +1 -0
- package/dist/utils/utils.d.ts +33 -0
- package/dist/utils/utils.d.ts.map +1 -0
- package/dist/utils/utils.js +50 -0
- package/dist/utils/utils.js.map +1 -0
- package/dist/vision/backends/azure-vision.d.ts +33 -0
- package/dist/vision/backends/azure-vision.d.ts.map +1 -0
- package/dist/vision/backends/azure-vision.js +151 -0
- package/dist/vision/backends/azure-vision.js.map +1 -0
- package/dist/vision/backends/google-cloud-vision.d.ts +32 -0
- package/dist/vision/backends/google-cloud-vision.d.ts.map +1 -0
- package/dist/vision/backends/google-cloud-vision.js +193 -0
- package/dist/vision/backends/google-cloud-vision.js.map +1 -0
- package/dist/vision/backends/onnx.d.ts +116 -0
- package/dist/vision/backends/onnx.d.ts.map +1 -0
- package/dist/vision/backends/onnx.js +781 -0
- package/dist/vision/backends/onnx.js.map +1 -0
- package/dist/vision/index.d.ts +19 -0
- package/dist/vision/index.d.ts.map +1 -0
- package/dist/vision/index.js +20 -0
- package/dist/vision/index.js.map +1 -0
- package/dist/vision/vision-engine.d.ts +131 -0
- package/dist/vision/vision-engine.d.ts.map +1 -0
- package/dist/vision/vision-engine.js +97 -0
- package/dist/vision/vision-engine.js.map +1 -0
- package/dist/vision/vision.d.ts +48 -0
- package/dist/vision/vision.d.ts.map +1 -0
- package/dist/vision/vision.js +83 -0
- package/dist/vision/vision.js.map +1 -0
- package/package.json +124 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2026-present TJBot Contributors. All Rights Reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
|
|
17
|
+
import { loadAzureCredentials } from '../../utils/credentials.js';
|
|
18
|
+
import { TJBotError } from '../../utils/index.js';
|
|
19
|
+
import { getLogger } from '../../utils/logging.js';
|
|
20
|
+
import { STTEngine } from '../stt-engine.js';
|
|
21
|
+
import { isTimeoutLikeStreamEndReason, resolveTranscriptForStreamEnd } from '../stt-utils.js';
|
|
22
|
+
const logger = getLogger(import.meta.url);
|
|
23
|
+
/**
|
|
24
|
+
* Azure Cognitive Services Speech-to-Text Engine
|
|
25
|
+
*
|
|
26
|
+
* Cloud-based speech recognition using Microsoft Azure Speech Services.
|
|
27
|
+
* Requires Azure subscription key and region to be configured.
|
|
28
|
+
* @public
|
|
29
|
+
*/
|
|
30
|
+
export class AzureSTTEngine extends STTEngine {
|
|
31
|
+
microphoneRate = 44100;
|
|
32
|
+
microphoneChannels = 2;
|
|
33
|
+
subscriptionKey;
|
|
34
|
+
region;
|
|
35
|
+
async initialize(microphoneRate, microphoneChannels) {
|
|
36
|
+
const config = this.config;
|
|
37
|
+
const credentials = loadAzureCredentials(config?.credentialsPath);
|
|
38
|
+
this.subscriptionKey = credentials.speechKey;
|
|
39
|
+
this.region = credentials.speechRegion;
|
|
40
|
+
if (!config?.language) {
|
|
41
|
+
throw new TJBotError('Azure STT language not specified. Provide language in listen.backend.azure-stt config.');
|
|
42
|
+
}
|
|
43
|
+
if (!this.subscriptionKey || !this.region) {
|
|
44
|
+
throw new TJBotError('Azure Speech subscription key and region are required.');
|
|
45
|
+
}
|
|
46
|
+
this.microphoneRate = microphoneRate;
|
|
47
|
+
this.microphoneChannels = microphoneChannels;
|
|
48
|
+
logger.info('Azure STT engine initialized');
|
|
49
|
+
logger.debug(`Initialized AzureSTTEngine with config:
|
|
50
|
+
language: ${config?.language},
|
|
51
|
+
region: ${config?.region},
|
|
52
|
+
microphoneRate: ${this.microphoneRate},
|
|
53
|
+
microphoneChannels: ${this.microphoneChannels},
|
|
54
|
+
subscriptionKey: ${this.subscriptionKey ? '***' : 'not set'}
|
|
55
|
+
`);
|
|
56
|
+
}
|
|
57
|
+
async transcribe(micStream, options) {
|
|
58
|
+
const config = this.config;
|
|
59
|
+
if (!this.subscriptionKey || !this.region) {
|
|
60
|
+
throw new TJBotError('Azure STT not initialized. Call initialize() first.');
|
|
61
|
+
}
|
|
62
|
+
const interimResults = config?.interimResults ?? false;
|
|
63
|
+
logger.verbose(`Transcribing speech with Azure STT (language=${config?.language})`);
|
|
64
|
+
// Create speech config
|
|
65
|
+
const speechConfig = sdk.SpeechConfig.fromSubscription(this.subscriptionKey, this.region);
|
|
66
|
+
speechConfig.speechRecognitionLanguage = config?.language;
|
|
67
|
+
// Create audio config from stream
|
|
68
|
+
const audioFormat = sdk.AudioStreamFormat.getWaveFormatPCM(this.microphoneRate, 16, this.microphoneChannels);
|
|
69
|
+
const pushStream = sdk.AudioInputStream.createPushStream(audioFormat);
|
|
70
|
+
// Pipe microphone data to push stream
|
|
71
|
+
this.ensureStream(micStream).on('data', (chunk) => {
|
|
72
|
+
// Azure SDK expects an ArrayBuffer, convert Buffer while preserving view
|
|
73
|
+
const arrayBuffer = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength);
|
|
74
|
+
pushStream.write(arrayBuffer);
|
|
75
|
+
logger.silly(`piped ${chunk.length} bytes from microphone to Azure STT push stream`);
|
|
76
|
+
});
|
|
77
|
+
this.ensureStream(micStream).on('end', () => {
|
|
78
|
+
pushStream.close();
|
|
79
|
+
logger.silly('microphone stream ended, closed Azure STT push stream');
|
|
80
|
+
});
|
|
81
|
+
const audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
|
|
82
|
+
// Create recognizer
|
|
83
|
+
const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
|
|
84
|
+
if (!interimResults) {
|
|
85
|
+
return new Promise((resolve, reject) => {
|
|
86
|
+
recognizer.recognizeOnceAsync((result) => {
|
|
87
|
+
recognizer.close();
|
|
88
|
+
if (result.reason === sdk.ResultReason.RecognizedSpeech) {
|
|
89
|
+
logger.debug(`Azure STT recognized: ${result.text}`);
|
|
90
|
+
resolve(result.text.trim());
|
|
91
|
+
}
|
|
92
|
+
else if (result.reason === sdk.ResultReason.NoMatch) {
|
|
93
|
+
reject(new TJBotError('Azure STT: No speech could be recognized', {
|
|
94
|
+
code: 'stt.no-speech',
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
else if (result.reason === sdk.ResultReason.Canceled) {
|
|
98
|
+
const cancellation = sdk.CancellationDetails.fromResult(result);
|
|
99
|
+
reject(new TJBotError(`Azure STT canceled: ${cancellation.reason} - ${cancellation.errorDetails}`));
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
reject(new TJBotError(`Azure STT recognition failed with reason: ${result.reason}`));
|
|
103
|
+
}
|
|
104
|
+
}, (error) => {
|
|
105
|
+
recognizer.close();
|
|
106
|
+
reject(new TJBotError('Azure STT recognition error', { cause: new Error(error) }));
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
return new Promise((resolve, reject) => {
|
|
111
|
+
let settled = false;
|
|
112
|
+
let latestPartialTranscript = '';
|
|
113
|
+
let latestFinalTranscript = '';
|
|
114
|
+
const cleanup = () => {
|
|
115
|
+
recognizer.recognizing = () => {
|
|
116
|
+
// no-op after cleanup
|
|
117
|
+
};
|
|
118
|
+
recognizer.recognized = () => {
|
|
119
|
+
// no-op after cleanup
|
|
120
|
+
};
|
|
121
|
+
recognizer.canceled = () => {
|
|
122
|
+
// no-op after cleanup
|
|
123
|
+
};
|
|
124
|
+
recognizer.sessionStopped = () => {
|
|
125
|
+
// no-op after cleanup
|
|
126
|
+
};
|
|
127
|
+
recognizer.close();
|
|
128
|
+
};
|
|
129
|
+
const settleResolve = (text) => {
|
|
130
|
+
if (settled) {
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
settled = true;
|
|
134
|
+
recognizer.stopContinuousRecognitionAsync(() => {
|
|
135
|
+
cleanup();
|
|
136
|
+
resolve(text);
|
|
137
|
+
}, (error) => {
|
|
138
|
+
cleanup();
|
|
139
|
+
reject(new TJBotError('Azure STT stop recognition error', { cause: new Error(error) }));
|
|
140
|
+
});
|
|
141
|
+
};
|
|
142
|
+
const settleReject = (error) => {
|
|
143
|
+
if (settled) {
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
settled = true;
|
|
147
|
+
recognizer.stopContinuousRecognitionAsync(() => {
|
|
148
|
+
cleanup();
|
|
149
|
+
reject(error);
|
|
150
|
+
}, () => {
|
|
151
|
+
cleanup();
|
|
152
|
+
reject(error);
|
|
153
|
+
});
|
|
154
|
+
};
|
|
155
|
+
recognizer.recognizing = (_sender, event) => {
|
|
156
|
+
const text = event.result?.text?.trim();
|
|
157
|
+
if (text) {
|
|
158
|
+
latestPartialTranscript = text;
|
|
159
|
+
options.onPartialResult?.(text);
|
|
160
|
+
}
|
|
161
|
+
};
|
|
162
|
+
recognizer.recognized = (_sender, event) => {
|
|
163
|
+
if (event.result.reason === sdk.ResultReason.RecognizedSpeech) {
|
|
164
|
+
const text = event.result.text?.trim();
|
|
165
|
+
if (text) {
|
|
166
|
+
latestFinalTranscript = text;
|
|
167
|
+
logger.debug(`Azure STT recognized: ${text}`);
|
|
168
|
+
options.onFinalResult?.(text);
|
|
169
|
+
settleResolve(text);
|
|
170
|
+
}
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
if (event.result.reason === sdk.ResultReason.NoMatch) {
|
|
174
|
+
settleReject(new TJBotError('Azure STT: No speech could be recognized', {
|
|
175
|
+
code: 'stt.no-speech',
|
|
176
|
+
}));
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
recognizer.canceled = (_sender, event) => {
|
|
180
|
+
const cancelReason = `${event.reason} - ${event.errorDetails || ''}`;
|
|
181
|
+
const timeoutLikeEnd = isTimeoutLikeStreamEndReason(cancelReason);
|
|
182
|
+
const fallbackTranscript = resolveTranscriptForStreamEnd({
|
|
183
|
+
finalTranscript: latestFinalTranscript,
|
|
184
|
+
partialTranscript: latestPartialTranscript,
|
|
185
|
+
allowPartialOnTimeoutLikeEnd: true,
|
|
186
|
+
timeoutLikeEnd,
|
|
187
|
+
});
|
|
188
|
+
if (fallbackTranscript) {
|
|
189
|
+
logger.debug('Azure STT finalized using partial transcript after cancel event');
|
|
190
|
+
options.onFinalResult?.(fallbackTranscript);
|
|
191
|
+
settleResolve(fallbackTranscript);
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
if (timeoutLikeEnd) {
|
|
195
|
+
settleReject(new TJBotError('Azure STT: No speech could be recognized', {
|
|
196
|
+
code: 'stt.no-speech',
|
|
197
|
+
}));
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
settleReject(new TJBotError(`Azure STT canceled: ${event.reason} - ${event.errorDetails}`));
|
|
201
|
+
};
|
|
202
|
+
recognizer.sessionStopped = () => {
|
|
203
|
+
const fallbackTranscript = resolveTranscriptForStreamEnd({
|
|
204
|
+
finalTranscript: latestFinalTranscript,
|
|
205
|
+
partialTranscript: latestPartialTranscript,
|
|
206
|
+
allowPartialOnTimeoutLikeEnd: true,
|
|
207
|
+
timeoutLikeEnd: true,
|
|
208
|
+
});
|
|
209
|
+
if (fallbackTranscript) {
|
|
210
|
+
logger.debug('Azure STT finalized using partial transcript after session stop');
|
|
211
|
+
options.onFinalResult?.(fallbackTranscript);
|
|
212
|
+
settleResolve(fallbackTranscript);
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
settleReject(new TJBotError('Azure STT: No speech could be recognized', {
|
|
216
|
+
code: 'stt.no-speech',
|
|
217
|
+
}));
|
|
218
|
+
};
|
|
219
|
+
recognizer.startContinuousRecognitionAsync(() => {
|
|
220
|
+
logger.silly('Azure STT continuous recognition started');
|
|
221
|
+
}, (error) => {
|
|
222
|
+
settleReject(new TJBotError('Azure STT start recognition error', { cause: new Error(error) }));
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
//# sourceMappingURL=azure-stt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"azure-stt.js","sourceRoot":"","sources":["../../../src/stt/backends/azure-stt.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,GAAG,MAAM,wCAAwC,CAAC;AAE9D,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAqB,MAAM,kBAAkB,CAAC;AAChE,OAAO,EAAE,4BAA4B,EAAE,6BAA6B,EAAE,MAAM,iBAAiB,CAAC;AAE9F,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE1C;;;;;;GAMG;AACH,MAAM,OAAO,cAAe,SAAQ,SAAS;IACjC,cAAc,GAAW,KAAK,CAAC;IAC/B,kBAAkB,GAAW,CAAC,CAAC;IAC/B,eAAe,CAAU;IACzB,MAAM,CAAU;IAExB,KAAK,CAAC,UAAU,CAAC,cAAsB,EAAE,kBAA0B;QAC/D,MAAM,MAAM,GAAG,IAAI,CAAC,MAA+B,CAAC;QACpD,MAAM,WAAW,GAAG,oBAAoB,CAAC,MAAM,EAAE,eAAqC,CAAC,CAAC;QACxF,IAAI,CAAC,eAAe,GAAG,WAAW,CAAC,SAAS,CAAC;QAC7C,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,YAAY,CAAC;QAEvC,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC;YACpB,MAAM,IAAI,UAAU,CAChB,wFAAwF,CAC3F,CAAC;QACN,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACxC,MAAM,IAAI,UAAU,CAAC,wDAAwD,CAAC,CAAC;QACnF,CAAC;QAED,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAE7C,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAC5C,MAAM,CAAC,KAAK,CAAC;wBACG,MAAM,EAAE,QAAQ;sBAClB,MAAM,EAAE,MAAM;8BACN,IAAI,CAAC,cAAc;kCACf,IAAI,CAAC,kBAAkB;+BAC1B,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAC9D,CAAC,CAAC;IACP,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,SAAgC,EAAE,OAA0B;QACzE,MAAM,MAAM,GAAG,IAAI,CAAC,MAA+B,CAAC;QAEpD,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACxC,MAAM,IAAI,UAAU,CAAC,qDAAqD,CAAC,CAAC;QAChF,CAAC;QAED,MAAM,cAAc,GAAG,MAAM,EAAE,cAAc,IAAI,KAAK,CAAC;QAEvD,MAAM,CAAC,OAAO,CAAC,gDAAgD,MAAM,EAAE,QAAQ,GAAG,CAAC,CAAC;QAEpF,uBAAuB;QACvB,MAAM,YAAY,GAAG,GAAG,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC1F,YAAY,CAAC,yBAAyB,GAAG,MAAM,EAAE,QAAkB,CAAC;QAEpE,kCAAkC;QAClC,MAAM,WAAW,GAAG,GAAG,CAAC,iBAAiB,CAAC,gBAAgB,CAAC,IAAI,CAAC,cAAc,EAAE,EAAE,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC7G,MAAM,UAAU,GAAG,GAAG,CAAC,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,CAAC,CAAC;QAEtE,sCAAsC;QACtC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACtD,yEAAyE;YACzE,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC;YAC9F,UAAU,CAAC,KAAK,CAAC,WAA0B,CAAC,CAAC;YAC7C,MAAM,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,MAAM,iDAAiD,CAAC,CAAC;QACzF,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;YACxC,UAAU,CAAC,KAAK,EAAE,CAAC;YACnB,MAAM,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,GAAG,CAAC,WAAW,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QAEhE,oBAAoB;QACpB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,gBAAgB,CAAC,YAAY,EAAE,WAAW,CAAC,CAAC;QAEvE,IAAI,CAAC,cAAc,EAAE,CAAC;YAClB,OAAO,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC3C,UAAU,CAAC,kBAAkB,CACzB,CAAC,MAAmC,EAAE,EAAE;oBACpC,UAAU,CAAC,KAAK,EAAE,CAAC;oBAEnB,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,YAAY,CAAC,gBAAgB,EAAE,CAAC;wBACtD,MAAM,CAAC,KAAK,CAAC,yBAAyB,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;wBACrD,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;oBAChC,CAAC;yBAAM,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;wBACpD,MAAM,CACF,IAAI,UAAU,CAAC,0CAA0C,EAAE;4BACvD,IAAI,EAAE,eAAe;yBACxB,CAAC,CACL,CAAC;oBACN,CAAC;yBAAM,IAAI,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC;wBACrD,MAAM,YAAY,GAAG,GAAG,CAAC,mBAAmB,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;wBAChE,MAAM,CACF,IAAI,UAAU,CACV,uBAAuB,YAAY,CAAC,MAAM,MAAM,YAAY,CAAC,YAAY,EAAE,CAC9E,CACJ,CAAC;oBACN,CAAC;yBAAM,CAAC;wBACJ,MAAM,CAAC,IAAI,UAAU,CAAC,6CAA6C,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;oBACzF,CAAC;gBACL,CAAC,EACD,CAAC,KAAa,EAAE,EAAE;oBACd,UAAU,CAAC,KAAK,EAAE,CAAC;oBACnB,MAAM,CAAC,IAAI,UAAU,CAAC,6BAA6B,EAAE,EAAE,KAAK,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvF,CAAC,CACJ,CAAC;YACN,CAAC,CAAC,CAAC;QACP,CAAC;QAED,OAAO,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,IAAI,uBAAuB,GAAG,EAAE,CAAC;YACjC,IAAI,qBAAqB,GAAG,EAAE,CAAC;YAE/B,MAAM,OAAO,GAAG,GAAG,EAAE;gBACjB,UAAU,CAAC,WAAW,GAAG,GAAG,EAAE;oBAC1B,sBAAsB;gBAC1B,CAAC,CAAC;gBACF,UAAU,CAAC,UAAU,GAAG,GAAG,EAAE;oBACzB,sBAAsB;gBAC1B,CAAC,CAAC;gBACF,UAAU,CAAC,QAAQ,GAAG,GAAG,EAAE;oBACvB,sBAAsB;gBAC1B,CAAC,CAAC;gBACF,UAAU,CAAC,cAAc,GAAG,GAAG,EAAE;oBAC7B,sBAAsB;gBAC1B,CAAC,CAAC;gBACF,UAAU,CAAC,KAAK,EAAE,CAAC;YACvB,CAAC,CAAC;YAEF,MAAM,aAAa,GAAG,CAAC,IAAY,EAAE,EAAE;gBACnC,IAAI,OAAO,EAAE,CAAC;oBACV,OAAO;gBACX,CAAC;gBACD,OAAO,GAAG,IAAI,CAAC;gBACf,UAAU,CAAC,8BAA8B,CACrC,GAAG,EAAE;oBACD,OAAO,EAAE,CAAC;oBACV,OAAO,CAAC,IAAI,CAAC,CAAC;gBAClB,CAAC,EACD,CAAC,KAAa,EAAE,EAAE;oBACd,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,IAAI,UAAU,CAAC,kCAAkC,EAAE,EAAE,KAAK,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5F,CAAC,CACJ,CAAC;YACN,CAAC,CAAC;YAEF,MAAM,YAAY,GAAG,CAAC,KAAiB,EAAE,EAAE;gBACvC,IAAI,OAAO,EAAE,CAAC;oBACV,OAAO;gBACX,CAAC;gBACD,OAAO,GAAG,IAAI,CAAC;gBACf,UAAU,CAAC,8BAA8B,CACrC,GAAG,EAAE;oBACD,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,KAAK,CAAC,CAAC;gBAClB,CAAC,EACD,GAAG,EAAE;oBACD,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,KAAK,CAAC,CAAC;gBAClB,CAAC,CACJ,CAAC;YACN,CAAC,CAAC;YAEF,UAAU,CAAC,WAAW,GAAG,CAAC,OAAuB,EAAE,KAAqC,EAAE,EAAE;gBACxF,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBACxC,IAAI,IAAI,EAAE,CAAC;oBACP,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,OAAO,CAAC,eAAe,EAAE,CAAC,IAAI,CAAC,CAAC;gBACpC,CAAC;YACL,CAAC,CAAC;YAEF,UAAU,CAAC,UAAU,GAAG,CAAC,OAAuB,EAAE,KAAqC,EAAE,EAAE;gBACvF,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,YAAY,CAAC,gBAAgB,EAAE,CAAC;oBAC5D,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;oBACvC,IAAI,IAAI,EAAE,CAAC;wBACP,qBAAqB,GAAG,IAAI,CAAC;wBAC7B,MAAM,CAAC,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;wBAC9C,OAAO,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC;wBAC9B,aAAa,CAAC,IAAI,CAAC,CAAC;oBACxB,CAAC;oBACD,OAAO;gBACX,CAAC;gBAED,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;oBACnD,YAAY,CACR,IAAI,UAAU,CAAC,0CAA0C,EAAE;wBACvD,IAAI,EAAE,eAAe;qBACxB,CAAC,CACL,CAAC;gBACN,CAAC;YACL,CAAC,CAAC;YAEF,UAAU,CAAC,QAAQ,GAAG,CAAC,OAAuB,EAAE,KAA6C,EAAE,EAAE;gBAC7F,MAAM,YAAY,GAAG,GAAG,KAAK,CAAC,MAAM,MAAM,KAAK,CAAC,YAAY,IAAI,EAAE,EAAE,CAAC;gBACrE,MAAM,cAAc,GAAG,4BAA4B,CAAC,YAAY,CAAC,CAAC;gBAClE,MAAM,kBAAkB,GAAG,6BAA6B,CAAC;oBACrD,eAAe,EAAE,qBAAqB;oBACtC,iBAAiB,EAAE,uBAAuB;oBAC1C,4BAA4B,EAAE,IAAI;oBAClC,cAAc;iBACjB,CAAC,CAAC;gBAEH,IAAI,kBAAkB,EAAE,CAAC;oBACrB,MAAM,CAAC,KAAK,CAAC,iEAAiE,CAAC,CAAC;oBAChF,OAAO,CAAC,aAAa,EAAE,CAAC,kBAAkB,CAAC,CAAC;oBAC5C,aAAa,CAAC,kBAAkB,CAAC,CAAC;oBAClC,OAAO;gBACX,CAAC;gBAED,IAAI,cAAc,EAAE,CAAC;oBACjB,YAAY,CACR,IAAI,UAAU,CAAC,0CAA0C,EAAE;wBACvD,IAAI,EAAE,eAAe;qBACxB,CAAC,CACL,CAAC;oBACF,OAAO;gBACX,CAAC;gBAED,YAAY,CAAC,IAAI,UAAU,CAAC,uBAAuB,KAAK,CAAC,MAAM,MAAM,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;YAChG,CAAC,CAAC;YAEF,UAAU,CAAC,cAAc,GAAG,GAAG,EAAE;gBAC7B,MAAM,kBAAkB,GAAG,6BAA6B,CAAC;oBACrD,eAAe,EAAE,qBAAqB;oBACtC,iBAAiB,EAAE,uBAAuB;oBAC1C,4BAA4B,EAAE,IAAI;oBAClC,cAAc,EAAE,IAAI;iBACvB,CAAC,CAAC;gBAEH,IAAI,kBAAkB,EAAE,CAAC;oBACrB,MAAM,CAAC,KAAK,CAAC,iEAAiE,CAAC,CAAC;oBAChF,OAAO,CAAC,aAAa,EAAE,CAAC,kBAAkB,CAAC,CAAC;oBAC5C,aAAa,CAAC,kBAAkB,CAAC,CAAC;oBAClC,OAAO;gBACX,CAAC;gBAED,YAAY,CACR,IAAI,UAAU,CAAC,0CAA0C,EAAE;oBACvD,IAAI,EAAE,eAAe;iBACxB,CAAC,CACL,CAAC;YACN,CAAC,CAAC;YAEF,UAAU,CAAC,+BAA+B,CACtC,GAAG,EAAE;gBACD,MAAM,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;YAC7D,CAAC,EACD,CAAC,KAAa,EAAE,EAAE;gBACd,YAAY,CAAC,IAAI,UAAU,CAAC,mCAAmC,EAAE,EAAE,KAAK,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YACnG,CAAC,CACJ,CAAC;QACN,CAAC,CAAC,CAAC;IACP,CAAC;CACJ"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2026-present TJBot Contributors. All Rights Reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
import { STTEngine, STTRequestOptions } from '../stt-engine.js';
|
|
17
|
+
/**
|
|
18
|
+
* Google Cloud Speech-to-Text Engine
|
|
19
|
+
*
|
|
20
|
+
* Cloud-based speech recognition using Google Cloud Speech-to-Text API.
|
|
21
|
+
* Requires Google Cloud credentials JSON file to be configured.
|
|
22
|
+
* @public
|
|
23
|
+
*/
|
|
24
|
+
export declare class GoogleCloudSTTEngine extends STTEngine {
|
|
25
|
+
private microphoneRate;
|
|
26
|
+
private microphoneChannels;
|
|
27
|
+
private client;
|
|
28
|
+
initialize(microphoneRate: number, microphoneChannels: number): Promise<void>;
|
|
29
|
+
transcribe(micStream: NodeJS.ReadableStream, options: STTRequestOptions): Promise<string>;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=google-cloud-stt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"google-cloud-stt.d.ts","sourceRoot":"","sources":["../../../src/stt/backends/google-cloud-stt.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAqFhE;;;;;;GAMG;AACH,qBAAa,oBAAqB,SAAQ,SAAS;IAC/C,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,kBAAkB,CAAa;IACvC,OAAO,CAAC,MAAM,CAAoC;IAE5C,UAAU,CAAC,cAAc,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA+C7E,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,cAAc,EAAE,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,MAAM,CAAC;CA0SlG"}
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2026-present TJBot Contributors. All Rights Reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
import { protos as speechProtos, v2 as speechV2 } from '@google-cloud/speech';
|
|
17
|
+
import { isTimeoutLikeStreamEndReason, resolveTranscriptForStreamEnd } from '../stt-utils.js';
|
|
18
|
+
import { loadGoogleCloudCredentials } from '../../utils/credentials.js';
|
|
19
|
+
import { TJBotError } from '../../utils/index.js';
|
|
20
|
+
import { getLogger } from '../../utils/logging.js';
|
|
21
|
+
import { STTEngine } from '../stt-engine.js';
|
|
22
|
+
const logger = getLogger(import.meta.url);
|
|
23
|
+
const SUPPORTED_GOOGLE_STT_MODEL_REGIONS = {
|
|
24
|
+
chirp_3: ['us', 'eu'],
|
|
25
|
+
chirp_2: ['us-central1', 'europe-west4', 'asia-southeast1'],
|
|
26
|
+
};
|
|
27
|
+
const MAX_GOOGLE_STT_AUDIO_CHUNK_BYTES = 25600;
|
|
28
|
+
function assertSupportedGoogleSTTModelAndRegion(model, region) {
|
|
29
|
+
if (!(model in SUPPORTED_GOOGLE_STT_MODEL_REGIONS)) {
|
|
30
|
+
throw new TJBotError(`Google Cloud STT model "${model}" is not supported. Supported models: ${Object.keys(SUPPORTED_GOOGLE_STT_MODEL_REGIONS).join(', ')}`);
|
|
31
|
+
}
|
|
32
|
+
const supportedRegions = [...SUPPORTED_GOOGLE_STT_MODEL_REGIONS[model]];
|
|
33
|
+
if (!supportedRegions.includes(region)) {
|
|
34
|
+
throw new TJBotError(`Google Cloud STT region "${region}" is not supported for model "${model}". Supported regions: ${supportedRegions.join(', ')}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function toGoogleCloudRecognitionError(error, recognizerPath) {
|
|
38
|
+
const googleError = error;
|
|
39
|
+
const permission = googleError.errorInfoMetadata?.permission;
|
|
40
|
+
const details = googleError.details ?? googleError.message;
|
|
41
|
+
const isPermissionDenied = googleError.code === 7 ||
|
|
42
|
+
googleError.reason === 'IAM_PERMISSION_DENIED' ||
|
|
43
|
+
permission === 'speech.recognizers.recognize';
|
|
44
|
+
const isAudioChunkTooLarge = googleError.code === 3 && /maximum of 25600 bytes/i.test(details);
|
|
45
|
+
if (isAudioChunkTooLarge) {
|
|
46
|
+
return new TJBotError(`Google Cloud STT rejected an audio chunk over ${MAX_GOOGLE_STT_AUDIO_CHUNK_BYTES} bytes. Audio must be streamed in smaller chunks.`, {
|
|
47
|
+
code: 'stt.google-cloud.chunk-too-large',
|
|
48
|
+
cause: error,
|
|
49
|
+
context: {
|
|
50
|
+
recognizer: recognizerPath,
|
|
51
|
+
details,
|
|
52
|
+
},
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
if (isPermissionDenied) {
|
|
56
|
+
return new TJBotError(`Google Cloud STT permission denied for recognizer ${recognizerPath}. Ensure the credentials have the permission speech.recognizers.recognize on that recognizer resource and that the selected region matches the recognizer location.`, {
|
|
57
|
+
code: 'stt.google-cloud.permission-denied',
|
|
58
|
+
cause: error,
|
|
59
|
+
context: {
|
|
60
|
+
recognizer: recognizerPath,
|
|
61
|
+
permission: permission ?? 'speech.recognizers.recognize',
|
|
62
|
+
details,
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
return new TJBotError('Google Cloud STT recognition failed', {
|
|
67
|
+
cause: error,
|
|
68
|
+
context: {
|
|
69
|
+
recognizer: recognizerPath,
|
|
70
|
+
details,
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Google Cloud Speech-to-Text Engine
|
|
76
|
+
*
|
|
77
|
+
* Cloud-based speech recognition using Google Cloud Speech-to-Text API.
|
|
78
|
+
* Requires Google Cloud credentials JSON file to be configured.
|
|
79
|
+
* @public
|
|
80
|
+
*/
|
|
81
|
+
export class GoogleCloudSTTEngine extends STTEngine {
|
|
82
|
+
microphoneRate = 44100;
|
|
83
|
+
microphoneChannels = 2;
|
|
84
|
+
client;
|
|
85
|
+
async initialize(microphoneRate, microphoneChannels) {
|
|
86
|
+
const config = this.config;
|
|
87
|
+
const credentials = loadGoogleCloudCredentials(config?.credentialsPath);
|
|
88
|
+
const model = config?.model?.trim();
|
|
89
|
+
const languageCode = config?.languageCode?.trim();
|
|
90
|
+
const region = config?.region?.trim();
|
|
91
|
+
const endpoint = `${region}-speech.googleapis.com`;
|
|
92
|
+
const enableAutomaticPunctuation = config?.enableAutomaticPunctuation ?? true;
|
|
93
|
+
const profanityFilter = config?.profanityFilter ?? true;
|
|
94
|
+
const interimResults = config?.interimResults ?? true;
|
|
95
|
+
if (!model) {
|
|
96
|
+
throw new TJBotError('Google Cloud STT model not specified. Provide model in listen.backend.google-cloud-stt config.');
|
|
97
|
+
}
|
|
98
|
+
if (!languageCode) {
|
|
99
|
+
throw new TJBotError('Google Cloud STT languageCode not specified. Provide languageCode in listen.backend.google-cloud-stt config.');
|
|
100
|
+
}
|
|
101
|
+
if (!region) {
|
|
102
|
+
throw new TJBotError('Google Cloud STT region not specified. Provide region in listen.backend.google-cloud-stt config.');
|
|
103
|
+
}
|
|
104
|
+
assertSupportedGoogleSTTModelAndRegion(model, region);
|
|
105
|
+
this.microphoneRate = microphoneRate;
|
|
106
|
+
this.microphoneChannels = microphoneChannels;
|
|
107
|
+
this.client = new speechV2.SpeechClient({ apiEndpoint: endpoint });
|
|
108
|
+
logger.info('Google Cloud STT engine initialized');
|
|
109
|
+
logger.debug(`Initialized GoogleCloudSTTEngine with config:
|
|
110
|
+
model: ${model},
|
|
111
|
+
languageCode: ${languageCode},
|
|
112
|
+
region: ${region},
|
|
113
|
+
endpoint: ${endpoint},
|
|
114
|
+
enableAutomaticPunctuation: ${enableAutomaticPunctuation},
|
|
115
|
+
profanityFilter: ${profanityFilter},
|
|
116
|
+
interimResults: ${interimResults},
|
|
117
|
+
microphoneRate: ${this.microphoneRate},
|
|
118
|
+
microphoneChannels: ${this.microphoneChannels},
|
|
119
|
+
credentialsPath: ${credentials.credentialsPath}`);
|
|
120
|
+
}
|
|
121
|
+
async transcribe(micStream, options) {
|
|
122
|
+
const config = this.config;
|
|
123
|
+
const model = config?.model?.trim();
|
|
124
|
+
const languageCode = config?.languageCode?.trim();
|
|
125
|
+
const region = config?.region?.trim();
|
|
126
|
+
const enableAutomaticPunctuation = config?.enableAutomaticPunctuation ?? true;
|
|
127
|
+
const profanityFilter = config?.profanityFilter ?? true;
|
|
128
|
+
const interimResults = config?.interimResults ?? true;
|
|
129
|
+
if (!this.client) {
|
|
130
|
+
throw new TJBotError('Google Cloud STT client not initialized. Call initialize() first.');
|
|
131
|
+
}
|
|
132
|
+
const client = this.client;
|
|
133
|
+
if (!model) {
|
|
134
|
+
throw new TJBotError('Google Cloud STT model not specified. Provide model in listen.backend.google-cloud-stt config.');
|
|
135
|
+
}
|
|
136
|
+
if (!languageCode) {
|
|
137
|
+
throw new TJBotError('Google Cloud STT languageCode not specified. Provide languageCode in listen.backend.google-cloud-stt config.');
|
|
138
|
+
}
|
|
139
|
+
if (!region) {
|
|
140
|
+
throw new TJBotError('Google Cloud STT region not specified. Provide region in listen.backend.google-cloud-stt config.');
|
|
141
|
+
}
|
|
142
|
+
assertSupportedGoogleSTTModelAndRegion(model, region);
|
|
143
|
+
const projectId = await client.getProjectId();
|
|
144
|
+
const recognizerPath = `projects/${projectId}/locations/${region}/recognizers/_`;
|
|
145
|
+
logger.verbose(`Transcribing speech with Google Cloud STT v2 (model=${model}, languageCode=${languageCode}, recognizer=${recognizerPath})`);
|
|
146
|
+
const request = {
|
|
147
|
+
config: {
|
|
148
|
+
explicitDecodingConfig: {
|
|
149
|
+
encoding: speechProtos.google.cloud.speech.v2.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
|
|
150
|
+
sampleRateHertz: this.microphoneRate,
|
|
151
|
+
audioChannelCount: this.microphoneChannels,
|
|
152
|
+
},
|
|
153
|
+
model,
|
|
154
|
+
languageCodes: [languageCode],
|
|
155
|
+
features: {
|
|
156
|
+
profanityFilter,
|
|
157
|
+
enableAutomaticPunctuation,
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
streamingFeatures: {
|
|
161
|
+
interimResults,
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
logger.silly('Google Cloud STT params:', JSON.stringify(request, null, 2));
|
|
165
|
+
const sourceStream = this.ensureStream(micStream);
|
|
166
|
+
return new Promise((resolve, reject) => {
|
|
167
|
+
const recognizeStream = client._streamingRecognize();
|
|
168
|
+
let settled = false;
|
|
169
|
+
let timeoutLikeStreamEnd = false;
|
|
170
|
+
let latestPartialTranscript = '';
|
|
171
|
+
let latestFinalTranscript = '';
|
|
172
|
+
const settleResolve = (transcript) => {
|
|
173
|
+
if (settled) {
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
settled = true;
|
|
177
|
+
cleanup();
|
|
178
|
+
resolve(transcript);
|
|
179
|
+
};
|
|
180
|
+
const settleReject = (error) => {
|
|
181
|
+
if (settled) {
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
settled = true;
|
|
185
|
+
cleanup();
|
|
186
|
+
reject(error);
|
|
187
|
+
};
|
|
188
|
+
const handleData = (data) => {
|
|
189
|
+
if (data.results && data.results.length > 0) {
|
|
190
|
+
const result = data.results[0];
|
|
191
|
+
if (!result.alternatives || result.alternatives.length === 0) {
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
const transcript = result.alternatives[0].transcript?.trim();
|
|
195
|
+
if (!transcript) {
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (!result.isFinal) {
|
|
199
|
+
// Always retain latest non-final transcript so timeout endings can still return best effort text.
|
|
200
|
+
latestPartialTranscript = transcript;
|
|
201
|
+
}
|
|
202
|
+
if (interimResults && !result.isFinal) {
|
|
203
|
+
options.onPartialResult?.(transcript);
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
if (result.isFinal) {
|
|
207
|
+
latestFinalTranscript = transcript;
|
|
208
|
+
logger.debug(`Google Cloud STT recognized: ${transcript}`);
|
|
209
|
+
if (interimResults) {
|
|
210
|
+
options.onFinalResult?.(transcript);
|
|
211
|
+
}
|
|
212
|
+
settleResolve(transcript);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
const handleMicData = (chunk) => {
|
|
217
|
+
if (settled) {
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
let audioChunk;
|
|
221
|
+
if (typeof chunk === 'string') {
|
|
222
|
+
audioChunk = Buffer.from(chunk);
|
|
223
|
+
}
|
|
224
|
+
else if (Buffer.isBuffer(chunk)) {
|
|
225
|
+
audioChunk = chunk;
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
audioChunk = Buffer.from(chunk);
|
|
229
|
+
}
|
|
230
|
+
try {
|
|
231
|
+
for (let offset = 0; offset < audioChunk.length; offset += MAX_GOOGLE_STT_AUDIO_CHUNK_BYTES) {
|
|
232
|
+
const chunkSlice = audioChunk.subarray(offset, offset + MAX_GOOGLE_STT_AUDIO_CHUNK_BYTES);
|
|
233
|
+
recognizeStream.write({
|
|
234
|
+
audio: chunkSlice,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
catch (err) {
|
|
239
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
240
|
+
settleReject(toGoogleCloudRecognitionError(error, recognizerPath));
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
const handleMicEnd = () => {
|
|
244
|
+
if (!settled) {
|
|
245
|
+
recognizeStream.end();
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
const handleMicError = (err) => {
|
|
249
|
+
logger.error('Google Cloud STT microphone stream error:', err);
|
|
250
|
+
settleReject(new TJBotError('Google Cloud STT microphone stream failed', { cause: err }));
|
|
251
|
+
};
|
|
252
|
+
const handleError = (err) => {
|
|
253
|
+
const timeoutLikeReason = isTimeoutLikeStreamEndReason(err.message);
|
|
254
|
+
timeoutLikeStreamEnd = timeoutLikeStreamEnd || timeoutLikeReason;
|
|
255
|
+
if (timeoutLikeReason) {
|
|
256
|
+
logger.warn(`Google Cloud STT stream reached timeout-like ending: ${err.message}`);
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
logger.error('Google Cloud STT stream error:', err);
|
|
260
|
+
}
|
|
261
|
+
const fallbackTranscript = resolveTranscriptForStreamEnd({
|
|
262
|
+
finalTranscript: latestFinalTranscript,
|
|
263
|
+
partialTranscript: latestPartialTranscript,
|
|
264
|
+
allowPartialOnTimeoutLikeEnd: true,
|
|
265
|
+
timeoutLikeEnd: timeoutLikeStreamEnd,
|
|
266
|
+
});
|
|
267
|
+
if (fallbackTranscript) {
|
|
268
|
+
logger.debug('Google Cloud STT finalized using partial transcript after stream timeout');
|
|
269
|
+
if (interimResults) {
|
|
270
|
+
options.onFinalResult?.(fallbackTranscript);
|
|
271
|
+
}
|
|
272
|
+
settleResolve(fallbackTranscript);
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
if (timeoutLikeStreamEnd) {
|
|
276
|
+
settleReject(new TJBotError('Google Cloud STT: No speech could be recognized', {
|
|
277
|
+
code: 'stt.no-speech',
|
|
278
|
+
}));
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
settleReject(toGoogleCloudRecognitionError(err, recognizerPath));
|
|
282
|
+
};
|
|
283
|
+
const handleEndWithoutTranscript = () => {
|
|
284
|
+
const fallbackTranscript = resolveTranscriptForStreamEnd({
|
|
285
|
+
finalTranscript: latestFinalTranscript,
|
|
286
|
+
partialTranscript: latestPartialTranscript,
|
|
287
|
+
allowPartialOnTimeoutLikeEnd: true,
|
|
288
|
+
timeoutLikeEnd: timeoutLikeStreamEnd,
|
|
289
|
+
});
|
|
290
|
+
if (fallbackTranscript) {
|
|
291
|
+
logger.debug('Google Cloud STT finalized using partial transcript after stream end');
|
|
292
|
+
if (interimResults) {
|
|
293
|
+
options.onFinalResult?.(fallbackTranscript);
|
|
294
|
+
}
|
|
295
|
+
settleResolve(fallbackTranscript);
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
settleReject(new TJBotError('Google Cloud STT: No speech could be recognized', {
|
|
299
|
+
code: 'stt.no-speech',
|
|
300
|
+
}));
|
|
301
|
+
};
|
|
302
|
+
const handleStatus = (status) => {
|
|
303
|
+
if (status.code === 0 || status.code === undefined) {
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
timeoutLikeStreamEnd = timeoutLikeStreamEnd || isTimeoutLikeStreamEndReason(status.details);
|
|
307
|
+
const fallbackTranscript = resolveTranscriptForStreamEnd({
|
|
308
|
+
finalTranscript: latestFinalTranscript,
|
|
309
|
+
partialTranscript: latestPartialTranscript,
|
|
310
|
+
allowPartialOnTimeoutLikeEnd: true,
|
|
311
|
+
timeoutLikeEnd: timeoutLikeStreamEnd,
|
|
312
|
+
});
|
|
313
|
+
if (fallbackTranscript) {
|
|
314
|
+
logger.debug('Google Cloud STT finalized using partial transcript from gRPC status');
|
|
315
|
+
if (interimResults) {
|
|
316
|
+
options.onFinalResult?.(fallbackTranscript);
|
|
317
|
+
}
|
|
318
|
+
settleResolve(fallbackTranscript);
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
if (timeoutLikeStreamEnd) {
|
|
322
|
+
settleReject(new TJBotError('Google Cloud STT: No speech could be recognized', {
|
|
323
|
+
code: 'stt.no-speech',
|
|
324
|
+
}));
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
settleReject(new TJBotError(`Google Cloud STT recognition failed: ${status.details || 'unknown error'}`, {
|
|
328
|
+
cause: new Error(`gRPC status ${String(status.code)}: ${status.details || 'unknown error'}`),
|
|
329
|
+
}));
|
|
330
|
+
};
|
|
331
|
+
const cleanup = () => {
|
|
332
|
+
sourceStream.removeListener('data', handleMicData);
|
|
333
|
+
sourceStream.removeListener('end', handleMicEnd);
|
|
334
|
+
sourceStream.removeListener('error', handleMicError);
|
|
335
|
+
recognizeStream.removeListener('data', handleData);
|
|
336
|
+
recognizeStream.removeListener('close', handleEndWithoutTranscript);
|
|
337
|
+
recognizeStream.removeListener('end', handleEndWithoutTranscript);
|
|
338
|
+
recognizeStream.removeListener('status', handleStatus);
|
|
339
|
+
try {
|
|
340
|
+
if (!recognizeStream.destroyed) {
|
|
341
|
+
recognizeStream.end();
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
catch (err) {
|
|
345
|
+
logger.debug('recognize stream end failed (likely already closed)', err);
|
|
346
|
+
}
|
|
347
|
+
recognizeStream.destroy();
|
|
348
|
+
};
|
|
349
|
+
recognizeStream.on('data', handleData);
|
|
350
|
+
recognizeStream.on('error', handleError);
|
|
351
|
+
recognizeStream.once('close', handleEndWithoutTranscript);
|
|
352
|
+
recognizeStream.once('end', handleEndWithoutTranscript);
|
|
353
|
+
recognizeStream.on('status', handleStatus);
|
|
354
|
+
try {
|
|
355
|
+
recognizeStream.write({
|
|
356
|
+
recognizer: recognizerPath,
|
|
357
|
+
streamingConfig: request,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
catch (err) {
|
|
361
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
362
|
+
settleReject(toGoogleCloudRecognitionError(error, recognizerPath));
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
sourceStream.on('data', handleMicData);
|
|
366
|
+
sourceStream.once('end', handleMicEnd);
|
|
367
|
+
sourceStream.once('error', handleMicError);
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
//# sourceMappingURL=google-cloud-stt.js.map
|