tjbot-ce 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +382 -0
  3. package/dist/camera/camera.d.ts +62 -0
  4. package/dist/camera/camera.d.ts.map +1 -0
  5. package/dist/camera/camera.js +155 -0
  6. package/dist/camera/camera.js.map +1 -0
  7. package/dist/camera/index.d.ts +18 -0
  8. package/dist/camera/index.d.ts.map +1 -0
  9. package/dist/camera/index.js +18 -0
  10. package/dist/camera/index.js.map +1 -0
  11. package/dist/config/config-types.d.ts +75 -0
  12. package/dist/config/config-types.d.ts.map +1 -0
  13. package/dist/config/config-types.generated.d.ts +495 -0
  14. package/dist/config/config-types.generated.d.ts.map +1 -0
  15. package/dist/config/config-types.generated.js +2 -0
  16. package/dist/config/config-types.generated.js.map +1 -0
  17. package/dist/config/config-types.js +175 -0
  18. package/dist/config/config-types.js.map +1 -0
  19. package/dist/config/index.d.ts +20 -0
  20. package/dist/config/index.d.ts.map +1 -0
  21. package/dist/config/index.js +19 -0
  22. package/dist/config/index.js.map +1 -0
  23. package/dist/config/tjbot-config.d.ts +98 -0
  24. package/dist/config/tjbot-config.d.ts.map +1 -0
  25. package/dist/config/tjbot-config.js +309 -0
  26. package/dist/config/tjbot-config.js.map +1 -0
  27. package/dist/config/vendor/colors.yaml +61 -0
  28. package/dist/config/vendor/model-registry.yaml +275 -0
  29. package/dist/config/vendor/tjbot-config.schema.yaml +792 -0
  30. package/dist/config/vendor/tjbot.default.toml +452 -0
  31. package/dist/led/index.d.ts +20 -0
  32. package/dist/led/index.d.ts.map +1 -0
  33. package/dist/led/index.js +20 -0
  34. package/dist/led/index.js.map +1 -0
  35. package/dist/led/led-common-anode.d.ts +38 -0
  36. package/dist/led/led-common-anode.d.ts.map +1 -0
  37. package/dist/led/led-common-anode.js +79 -0
  38. package/dist/led/led-common-anode.js.map +1 -0
  39. package/dist/led/led-neopixel-spi.d.ts +60 -0
  40. package/dist/led/led-neopixel-spi.d.ts.map +1 -0
  41. package/dist/led/led-neopixel-spi.js +216 -0
  42. package/dist/led/led-neopixel-spi.js.map +1 -0
  43. package/dist/led/led-neopixel-ws281x.js +186 -0
  44. package/dist/led/led-neopixel.d.ts +57 -0
  45. package/dist/led/led-neopixel.d.ts.map +1 -0
  46. package/dist/led/led-neopixel.js +235 -0
  47. package/dist/led/led-neopixel.js.map +1 -0
  48. package/dist/microphone/index.d.ts +18 -0
  49. package/dist/microphone/index.d.ts.map +1 -0
  50. package/dist/microphone/index.js +18 -0
  51. package/dist/microphone/index.js.map +1 -0
  52. package/dist/microphone/microphone.d.ts +65 -0
  53. package/dist/microphone/microphone.d.ts.map +1 -0
  54. package/dist/microphone/microphone.js +179 -0
  55. package/dist/microphone/microphone.js.map +1 -0
  56. package/dist/rpi-drivers/index.d.ts +22 -0
  57. package/dist/rpi-drivers/index.d.ts.map +1 -0
  58. package/dist/rpi-drivers/index.js +22 -0
  59. package/dist/rpi-drivers/index.js.map +1 -0
  60. package/dist/rpi-drivers/rpi-detect.d.ts +24 -0
  61. package/dist/rpi-drivers/rpi-detect.d.ts.map +1 -0
  62. package/dist/rpi-drivers/rpi-detect.js +49 -0
  63. package/dist/rpi-drivers/rpi-detect.js.map +1 -0
  64. package/dist/rpi-drivers/rpi-driver.d.ts +116 -0
  65. package/dist/rpi-drivers/rpi-driver.d.ts.map +1 -0
  66. package/dist/rpi-drivers/rpi-driver.js +261 -0
  67. package/dist/rpi-drivers/rpi-driver.js.map +1 -0
  68. package/dist/rpi-drivers/rpi3-driver.d.ts +47 -0
  69. package/dist/rpi-drivers/rpi3-driver.d.ts.map +1 -0
  70. package/dist/rpi-drivers/rpi3-driver.js +145 -0
  71. package/dist/rpi-drivers/rpi3-driver.js.map +1 -0
  72. package/dist/rpi-drivers/rpi4-driver.d.ts +35 -0
  73. package/dist/rpi-drivers/rpi4-driver.d.ts.map +1 -0
  74. package/dist/rpi-drivers/rpi4-driver.js +101 -0
  75. package/dist/rpi-drivers/rpi4-driver.js.map +1 -0
  76. package/dist/rpi-drivers/rpi5-driver.d.ts +33 -0
  77. package/dist/rpi-drivers/rpi5-driver.d.ts.map +1 -0
  78. package/dist/rpi-drivers/rpi5-driver.js +78 -0
  79. package/dist/rpi-drivers/rpi5-driver.js.map +1 -0
  80. package/dist/servo/index.d.ts +19 -0
  81. package/dist/servo/index.d.ts.map +1 -0
  82. package/dist/servo/index.js +19 -0
  83. package/dist/servo/index.js.map +1 -0
  84. package/dist/servo/servo-constants.d.ts +33 -0
  85. package/dist/servo/servo-constants.d.ts.map +1 -0
  86. package/dist/servo/servo-constants.js +34 -0
  87. package/dist/servo/servo-constants.js.map +1 -0
  88. package/dist/servo/servo-lgpio.d.ts +82 -0
  89. package/dist/servo/servo-lgpio.d.ts.map +1 -0
  90. package/dist/servo/servo-lgpio.js +178 -0
  91. package/dist/servo/servo-lgpio.js.map +1 -0
  92. package/dist/speaker/audio-player.d.ts +30 -0
  93. package/dist/speaker/audio-player.d.ts.map +1 -0
  94. package/dist/speaker/audio-player.js +68 -0
  95. package/dist/speaker/audio-player.js.map +1 -0
  96. package/dist/speaker/index.d.ts +18 -0
  97. package/dist/speaker/index.d.ts.map +1 -0
  98. package/dist/speaker/index.js +18 -0
  99. package/dist/speaker/index.js.map +1 -0
  100. package/dist/speaker/speaker.d.ts +53 -0
  101. package/dist/speaker/speaker.d.ts.map +1 -0
  102. package/dist/speaker/speaker.js +125 -0
  103. package/dist/speaker/speaker.js.map +1 -0
  104. package/dist/stt/backends/azure-stt.d.ts +32 -0
  105. package/dist/stt/backends/azure-stt.d.ts.map +1 -0
  106. package/dist/stt/backends/azure-stt.js +227 -0
  107. package/dist/stt/backends/azure-stt.js.map +1 -0
  108. package/dist/stt/backends/google-cloud-stt.d.ts +31 -0
  109. package/dist/stt/backends/google-cloud-stt.d.ts.map +1 -0
  110. package/dist/stt/backends/google-cloud-stt.js +371 -0
  111. package/dist/stt/backends/google-cloud-stt.js.map +1 -0
  112. package/dist/stt/backends/ibm-watson-stt.d.ts +32 -0
  113. package/dist/stt/backends/ibm-watson-stt.d.ts.map +1 -0
  114. package/dist/stt/backends/ibm-watson-stt.js +190 -0
  115. package/dist/stt/backends/ibm-watson-stt.js.map +1 -0
  116. package/dist/stt/backends/sherpa-onnx-stt.d.ts +117 -0
  117. package/dist/stt/backends/sherpa-onnx-stt.d.ts.map +1 -0
  118. package/dist/stt/backends/sherpa-onnx-stt.js +694 -0
  119. package/dist/stt/backends/sherpa-onnx-stt.js.map +1 -0
  120. package/dist/stt/index.d.ts +20 -0
  121. package/dist/stt/index.d.ts.map +1 -0
  122. package/dist/stt/index.js +21 -0
  123. package/dist/stt/index.js.map +1 -0
  124. package/dist/stt/stt-engine.d.ts +68 -0
  125. package/dist/stt/stt-engine.d.ts.map +1 -0
  126. package/dist/stt/stt-engine.js +99 -0
  127. package/dist/stt/stt-engine.js.map +1 -0
  128. package/dist/stt/stt-utils.d.ts +36 -0
  129. package/dist/stt/stt-utils.d.ts.map +1 -0
  130. package/dist/stt/stt-utils.js +112 -0
  131. package/dist/stt/stt-utils.js.map +1 -0
  132. package/dist/stt/stt.d.ts +52 -0
  133. package/dist/stt/stt.d.ts.map +1 -0
  134. package/dist/stt/stt.js +100 -0
  135. package/dist/stt/stt.js.map +1 -0
  136. package/dist/tjbot.d.ts +317 -0
  137. package/dist/tjbot.d.ts.map +1 -0
  138. package/dist/tjbot.js +736 -0
  139. package/dist/tjbot.js.map +1 -0
  140. package/dist/tts/backends/azure-tts.d.ts +30 -0
  141. package/dist/tts/backends/azure-tts.d.ts.map +1 -0
  142. package/dist/tts/backends/azure-tts.js +92 -0
  143. package/dist/tts/backends/azure-tts.js.map +1 -0
  144. package/dist/tts/backends/google-cloud-tts.d.ts +38 -0
  145. package/dist/tts/backends/google-cloud-tts.d.ts.map +1 -0
  146. package/dist/tts/backends/google-cloud-tts.js +116 -0
  147. package/dist/tts/backends/google-cloud-tts.js.map +1 -0
  148. package/dist/tts/backends/ibm-watson-tts.d.ts +42 -0
  149. package/dist/tts/backends/ibm-watson-tts.d.ts.map +1 -0
  150. package/dist/tts/backends/ibm-watson-tts.js +99 -0
  151. package/dist/tts/backends/ibm-watson-tts.js.map +1 -0
  152. package/dist/tts/backends/sherpa-onnx-tts.d.ts +80 -0
  153. package/dist/tts/backends/sherpa-onnx-tts.d.ts.map +1 -0
  154. package/dist/tts/backends/sherpa-onnx-tts.js +237 -0
  155. package/dist/tts/backends/sherpa-onnx-tts.js.map +1 -0
  156. package/dist/tts/index.d.ts +19 -0
  157. package/dist/tts/index.d.ts.map +1 -0
  158. package/dist/tts/index.js +20 -0
  159. package/dist/tts/index.js.map +1 -0
  160. package/dist/tts/tts-engine.d.ts +67 -0
  161. package/dist/tts/tts-engine.d.ts.map +1 -0
  162. package/dist/tts/tts-engine.js +109 -0
  163. package/dist/tts/tts-engine.js.map +1 -0
  164. package/dist/tts/tts.d.ts +47 -0
  165. package/dist/tts/tts.d.ts.map +1 -0
  166. package/dist/tts/tts.js +101 -0
  167. package/dist/tts/tts.js.map +1 -0
  168. package/dist/utils/colors.d.ts +39 -0
  169. package/dist/utils/colors.d.ts.map +1 -0
  170. package/dist/utils/colors.js +155 -0
  171. package/dist/utils/colors.js.map +1 -0
  172. package/dist/utils/constants.d.ts +41 -0
  173. package/dist/utils/constants.d.ts.map +1 -0
  174. package/dist/utils/constants.js +43 -0
  175. package/dist/utils/constants.js.map +1 -0
  176. package/dist/utils/credentials.d.ts +43 -0
  177. package/dist/utils/credentials.d.ts.map +1 -0
  178. package/dist/utils/credentials.js +121 -0
  179. package/dist/utils/credentials.js.map +1 -0
  180. package/dist/utils/errors.d.ts +26 -0
  181. package/dist/utils/errors.d.ts.map +1 -0
  182. package/dist/utils/errors.js +32 -0
  183. package/dist/utils/errors.js.map +1 -0
  184. package/dist/utils/index.d.ts +25 -0
  185. package/dist/utils/index.d.ts.map +1 -0
  186. package/dist/utils/index.js +23 -0
  187. package/dist/utils/index.js.map +1 -0
  188. package/dist/utils/logging.d.ts +44 -0
  189. package/dist/utils/logging.d.ts.map +1 -0
  190. package/dist/utils/logging.js +113 -0
  191. package/dist/utils/logging.js.map +1 -0
  192. package/dist/utils/model-registry.d.ts +142 -0
  193. package/dist/utils/model-registry.d.ts.map +1 -0
  194. package/dist/utils/model-registry.js +391 -0
  195. package/dist/utils/model-registry.js.map +1 -0
  196. package/dist/utils/utils.d.ts +33 -0
  197. package/dist/utils/utils.d.ts.map +1 -0
  198. package/dist/utils/utils.js +50 -0
  199. package/dist/utils/utils.js.map +1 -0
  200. package/dist/vision/backends/azure-vision.d.ts +33 -0
  201. package/dist/vision/backends/azure-vision.d.ts.map +1 -0
  202. package/dist/vision/backends/azure-vision.js +151 -0
  203. package/dist/vision/backends/azure-vision.js.map +1 -0
  204. package/dist/vision/backends/google-cloud-vision.d.ts +32 -0
  205. package/dist/vision/backends/google-cloud-vision.d.ts.map +1 -0
  206. package/dist/vision/backends/google-cloud-vision.js +193 -0
  207. package/dist/vision/backends/google-cloud-vision.js.map +1 -0
  208. package/dist/vision/backends/onnx.d.ts +116 -0
  209. package/dist/vision/backends/onnx.d.ts.map +1 -0
  210. package/dist/vision/backends/onnx.js +781 -0
  211. package/dist/vision/backends/onnx.js.map +1 -0
  212. package/dist/vision/index.d.ts +19 -0
  213. package/dist/vision/index.d.ts.map +1 -0
  214. package/dist/vision/index.js +20 -0
  215. package/dist/vision/index.js.map +1 -0
  216. package/dist/vision/vision-engine.d.ts +131 -0
  217. package/dist/vision/vision-engine.d.ts.map +1 -0
  218. package/dist/vision/vision-engine.js +97 -0
  219. package/dist/vision/vision-engine.js.map +1 -0
  220. package/dist/vision/vision.d.ts +48 -0
  221. package/dist/vision/vision.d.ts.map +1 -0
  222. package/dist/vision/vision.js +83 -0
  223. package/dist/vision/vision.js.map +1 -0
  224. package/package.json +124 -0
@@ -0,0 +1,694 @@
1
+ /**
2
+ * Copyright 2026-present TJBot Contributors. All Rights Reserved.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+ import path from 'path';
17
+ import { ModelRegistry, TJBotError } from '../../utils/index.js';
18
+ import { getLogger } from '../../utils/logging.js';
19
+ import { STTEngine } from '../stt-engine.js';
20
+ const logger = getLogger(import.meta.url);
21
+ // Lazy require sherpa-onnx to avoid hard dependency issues
22
+ let sherpa;
23
+ /**
24
+ * Sherpa-ONNX Speech-to-Text Engine
25
+ *
26
+ * Enhanced local speech recognition using Sherpa-ONNX library with support for:
27
+ * - Multiple model types (Moonshine, Whisper, Zipformer, Paraformer)
28
+ * - Streaming and offline recognition modes
29
+ * - Voice Activity Detection (VAD) for better endpointing
30
+ * - Automatic model download and caching
31
+ *
32
+ * @public
33
+ */
34
+ export class SherpaONNXSTTEngine extends STTEngine {
35
+ registry = ModelRegistry.getInstance();
36
+ modelInfo;
37
+ modelPaths;
38
+ vadPath;
39
+ vad;
40
+ recognizer;
41
+ async initialize() {
42
+ const config = this.config;
43
+ const vadConfig = config.vad;
44
+ if (!config.model) {
45
+ throw new TJBotError('Sherpa-ONNX STT model not specified. Provide model name in listen.backend.sherpa-onnx config.');
46
+ }
47
+ // Load sherpa-onnx
48
+ if (!sherpa) {
49
+ // Set environment variables to reduce noisy logging
50
+ process.env.SHERPA_ONNX_LOG_LEVEL = 'OFF';
51
+ const module = await import('sherpa-onnx-node');
52
+ // CommonJS module imported as ES module has exports in .default
53
+ sherpa = (module.default || module);
54
+ logger.debug('successfully loaded sherpa-onnx-node module');
55
+ }
56
+ // Load STT model from registry
57
+ const modelName = config.model;
58
+ logger.info(`Loading STT model: ${modelName}`);
59
+ this.modelInfo = await this.registry.loadModel(modelName);
60
+ const modelCacheDir = this.registry.getModelCacheDirForType('stt');
61
+ const modelDir = path.join(modelCacheDir, this.modelInfo.folder);
62
+ this.modelPaths = this.pathsForModelKey(this.modelInfo.key, modelDir);
63
+ // Download VAD model if needed for offline recognition
64
+ if (vadConfig && this.modelInfo) {
65
+ if (this.modelInfo.kind.startsWith('offline') && vadConfig.enabled) {
66
+ const vadModelName = vadConfig.model;
67
+ logger.info(`Loading VAD model: ${vadModelName}`);
68
+ const vadInfo = await this.registry.loadModel(vadModelName);
69
+ const vadCacheDir = this.registry.getModelCacheDirForType('vad');
70
+ this.vadPath = path.join(vadCacheDir, vadInfo.folder, vadInfo.required[0]);
71
+ }
72
+ }
73
+ // Create the STT recognizer and VAD as needed
74
+ await this.setupRecognizer();
75
+ logger.info('Sherpa-ONNX STT engine initialized');
76
+ }
77
+ async transcribe(micStream, options) {
78
+ const config = this.config;
79
+ if (!sherpa || !this.recognizer) {
80
+ throw new TJBotError('Sherpa-ONNX STT service not initialized. Call initialize() first.');
81
+ }
82
+ if (!this.modelInfo) {
83
+ throw new TJBotError('Model info not set. Ensure initialize() was called.');
84
+ }
85
+ logger.verbose(`Transcribing speech with Sherpa-ONNX STT (model=${this.modelInfo.key}, kind=${this.modelInfo.kind})`);
86
+ try {
87
+ this.ensureStream(micStream);
88
+ const inputRate = config.microphoneRate ?? 16000;
89
+ // Route to appropriate transcription method based on model type
90
+ if (this.modelInfo.kind === 'streaming' || this.modelInfo.kind === 'streaming-zipformer') {
91
+ return await this.transcribeStreaming(micStream, inputRate, options);
92
+ }
93
+ else {
94
+ const useVad = this.shouldUseVad();
95
+ return await this.transcribeOffline(micStream, inputRate, useVad, options);
96
+ }
97
+ }
98
+ catch (error) {
99
+ throw new TJBotError('Transcription failed', { cause: error });
100
+ }
101
+ }
102
+ /**
103
+ * Determine if VAD should be used
104
+ */
105
+ shouldUseVad() {
106
+ const config = this.config;
107
+ if (!this.modelInfo) {
108
+ throw new TJBotError('Model info not set. Ensure initialize() was called.');
109
+ }
110
+ const vadConfig = config.vad;
111
+ const vadEnabled = vadConfig.enabled ?? true;
112
+ const isOffline = this.modelInfo.kind.startsWith('offline');
113
+ return isOffline && vadEnabled && Boolean(this.vadPath);
114
+ }
115
+ /**
116
+ * Setup recognizer and VAD based on model configuration
117
+ */
118
+ async setupRecognizer() {
119
+ if (!this.modelInfo) {
120
+ throw new TJBotError('Model info not set. Ensure initialize() was called.');
121
+ }
122
+ if (!this.modelPaths) {
123
+ throw new TJBotError('Model paths not set. Ensure initialize() was called.');
124
+ }
125
+ // Create recognizer once if not already created (model is constant after initialize())
126
+ if (!this.recognizer) {
127
+ if (this.modelInfo.kind === 'streaming') {
128
+ this.recognizer = this.createOnlineRecognizer(this.modelPaths);
129
+ }
130
+ else if (this.modelInfo.kind === 'streaming-zipformer') {
131
+ this.recognizer = this.createZipformerRecognizer(this.modelPaths);
132
+ }
133
+ else if (this.modelInfo.kind === 'offline-whisper') {
134
+ this.recognizer = this.createWhisperRecognizer(this.modelPaths);
135
+ }
136
+ else {
137
+ this.recognizer = this.createOfflineRecognizer(this.modelPaths);
138
+ }
139
+ logger.debug(`created recognizer for model: ${this.modelInfo.key} (${this.modelInfo.kind})`);
140
+ }
141
+ // Setup VAD if needed
142
+ if (this.vadPath && !this.vad) {
143
+ this.vad = this.createSileroVad(this.vadPath);
144
+ logger.debug('created Silero VAD instance');
145
+ }
146
+ }
147
+ /**
148
+ * Get the paths for all of the model files for a given model key.
149
+ * @param key The model key (e.g. "moonshine-tiny", "whisper-tiny", "zipformer-en", "paraformer-en")
150
+ * @param baseDir The folder in which the model exists.
151
+ * @returns An STTModelPaths object containing the paths to the model files.
152
+ */
153
+ pathsForModelKey(key, baseDir) {
154
+ // Moonshine models (both tiny and base)
155
+ if (key.startsWith('moonshine')) {
156
+ return {
157
+ preprocessor: path.join(baseDir, 'preprocess.onnx'),
158
+ encoder: path.join(baseDir, 'encode.int8.onnx'),
159
+ uncachedDecoder: path.join(baseDir, 'uncached_decode.int8.onnx'),
160
+ cachedDecoder: path.join(baseDir, 'cached_decode.int8.onnx'),
161
+ tokens: path.join(baseDir, 'tokens.txt'),
162
+ };
163
+ }
164
+ if (key === 'whisper-tiny') {
165
+ return {
166
+ encoder: path.join(baseDir, 'tiny.en-encoder.int8.onnx'),
167
+ decoder: path.join(baseDir, 'tiny.en-decoder.int8.onnx'),
168
+ tokens: path.join(baseDir, 'tiny.en-tokens.txt'),
169
+ };
170
+ }
171
+ if (key === 'whisper-base') {
172
+ return {
173
+ encoder: path.join(baseDir, 'base.en-encoder.int8.onnx'),
174
+ decoder: path.join(baseDir, 'base.en-decoder.int8.onnx'),
175
+ tokens: path.join(baseDir, 'base.en-tokens.txt'),
176
+ };
177
+ }
178
+ if (key === 'zipformer-en') {
179
+ return {
180
+ encoder: path.join(baseDir, 'encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx'),
181
+ decoder: path.join(baseDir, 'decoder-epoch-99-avg-1-chunk-16-left-128.onnx'),
182
+ joiner: path.join(baseDir, 'joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx'),
183
+ tokens: path.join(baseDir, 'tokens.txt'),
184
+ };
185
+ }
186
+ // Paraformer
187
+ if (key === 'paraformer-en') {
188
+ return {
189
+ encoder: path.join(baseDir, 'encoder.int8.onnx'),
190
+ decoder: path.join(baseDir, 'decoder.int8.onnx'),
191
+ tokens: path.join(baseDir, 'tokens.txt'),
192
+ };
193
+ }
194
+ throw new TJBotError(`Unsupported model key: ${key}`);
195
+ }
196
+ /**
197
+ * Extract and validate required paths for Paraformer online recognizer.
198
+ * @throws {TJBotError} if required paths are missing
199
+ */
200
+ validateParaformerPaths(modelPaths) {
201
+ if (!modelPaths.decoder) {
202
+ throw new TJBotError('Paraformer model requires decoder path');
203
+ }
204
+ return {
205
+ encoder: modelPaths.encoder,
206
+ decoder: modelPaths.decoder,
207
+ };
208
+ }
209
+ /**
210
+ * Extract and validate required paths for Zipformer online recognizer.
211
+ * @throws {TJBotError} if required paths are missing
212
+ */
213
+ validateZipformerPaths(modelPaths) {
214
+ if (!modelPaths.decoder) {
215
+ throw new TJBotError('Zipformer model requires decoder path');
216
+ }
217
+ if (!modelPaths.joiner) {
218
+ throw new TJBotError('Zipformer model requires joiner path');
219
+ }
220
+ return {
221
+ encoder: modelPaths.encoder,
222
+ decoder: modelPaths.decoder,
223
+ joiner: modelPaths.joiner,
224
+ };
225
+ }
226
+ /**
227
+ * Extract and validate required paths for Moonshine offline recognizer.
228
+ * @throws {TJBotError} if required paths are missing
229
+ */
230
+ validateMoonshinePaths(modelPaths) {
231
+ if (!modelPaths.preprocessor) {
232
+ throw new TJBotError('Moonshine model requires preprocessor path');
233
+ }
234
+ if (!modelPaths.uncachedDecoder) {
235
+ throw new TJBotError('Moonshine model requires uncachedDecoder path');
236
+ }
237
+ if (!modelPaths.cachedDecoder) {
238
+ throw new TJBotError('Moonshine model requires cachedDecoder path');
239
+ }
240
+ return {
241
+ preprocessor: modelPaths.preprocessor,
242
+ encoder: modelPaths.encoder,
243
+ uncachedDecoder: modelPaths.uncachedDecoder,
244
+ cachedDecoder: modelPaths.cachedDecoder,
245
+ };
246
+ }
247
+ /**
248
+ * Extract and validate required paths for Whisper offline recognizer.
249
+ * @throws {TJBotError} if required paths are missing
250
+ */
251
+ validateWhisperPaths(modelPaths) {
252
+ if (!modelPaths.decoder) {
253
+ throw new TJBotError('Whisper model requires decoder path');
254
+ }
255
+ return {
256
+ encoder: modelPaths.encoder,
257
+ decoder: modelPaths.decoder,
258
+ };
259
+ }
260
+ /**
261
+ * Create online recognizer for streaming Paraformer models
262
+ */
263
+ createOnlineRecognizer(modelPaths) {
264
+ if (!sherpa) {
265
+ throw new TJBotError('Sherpa-ONNX not initialized');
266
+ }
267
+ const paths = this.validateParaformerPaths(modelPaths);
268
+ const config = {
269
+ featConfig: { sampleRate: 16000, featureDim: 80 },
270
+ modelConfig: {
271
+ paraformer: {
272
+ encoder: paths.encoder,
273
+ decoder: paths.decoder,
274
+ },
275
+ tokens: modelPaths.tokens,
276
+ numThreads: 2,
277
+ provider: 'cpu',
278
+ debug: 0,
279
+ },
280
+ decodingMethod: 'greedy_search',
281
+ maxActivePaths: 4,
282
+ enableEndpoint: true,
283
+ rule1MinTrailingSilence: 2.4,
284
+ rule2MinTrailingSilence: 1.2,
285
+ rule3MinUtteranceLength: 1.2,
286
+ };
287
+ return new sherpa.OnlineRecognizer(config);
288
+ }
289
+ /**
290
+ * Create Zipformer recognizer for streaming transducer models
291
+ */
292
+ createZipformerRecognizer(modelPaths) {
293
+ if (!sherpa) {
294
+ throw new TJBotError('Sherpa-ONNX not initialized');
295
+ }
296
+ const paths = this.validateZipformerPaths(modelPaths);
297
+ const config = {
298
+ featConfig: { sampleRate: 16000, featureDim: 80 },
299
+ modelConfig: {
300
+ transducer: {
301
+ encoder: paths.encoder,
302
+ decoder: paths.decoder,
303
+ joiner: paths.joiner,
304
+ },
305
+ tokens: modelPaths.tokens,
306
+ numThreads: 2,
307
+ provider: 'cpu',
308
+ debug: 0,
309
+ },
310
+ decodingMethod: 'greedy_search',
311
+ maxActivePaths: 4,
312
+ enableEndpoint: true,
313
+ rule1MinTrailingSilence: 2.4,
314
+ rule2MinTrailingSilence: 1.2,
315
+ rule3MinUtteranceLength: 1.2,
316
+ };
317
+ return new sherpa.OnlineRecognizer(config);
318
+ }
319
+ /**
320
+ * Create offline recognizer for Moonshine models
321
+ */
322
+ createOfflineRecognizer(modelPaths) {
323
+ if (!sherpa) {
324
+ throw new TJBotError('Sherpa-ONNX not initialized');
325
+ }
326
+ // Verify model files exist
327
+ const paths = this.validateMoonshinePaths(modelPaths);
328
+ const config = {
329
+ featConfig: { sampleRate: 16000, featureDim: 80 },
330
+ modelConfig: {
331
+ moonshine: {
332
+ preprocessor: paths.preprocessor,
333
+ encoder: paths.encoder,
334
+ uncachedDecoder: paths.uncachedDecoder,
335
+ cachedDecoder: paths.cachedDecoder,
336
+ },
337
+ tokens: modelPaths.tokens,
338
+ numThreads: 2,
339
+ provider: 'cpu',
340
+ debug: 0,
341
+ },
342
+ decodingMethod: 'greedy_search',
343
+ };
344
+ logger.debug('creating Moonshine recognizer with config:', JSON.stringify(config, null, 2));
345
+ try {
346
+ const recognizer = new sherpa.OfflineRecognizer(config);
347
+ return recognizer;
348
+ }
349
+ catch (error) {
350
+ logger.error('Failed to create Moonshine recognizer:', error);
351
+ throw new TJBotError(`Failed to create Moonshine recognizer: ${error}`, { cause: error });
352
+ }
353
+ }
354
+ /**
355
+ * Create Whisper offline recognizer
356
+ */
357
+ createWhisperRecognizer(modelPaths) {
358
+ if (!sherpa) {
359
+ throw new TJBotError('Sherpa-ONNX not initialized');
360
+ }
361
+ // Verify model files exist
362
+ const paths = this.validateWhisperPaths(modelPaths);
363
+ const config = {
364
+ featConfig: { sampleRate: 16000, featureDim: 80 },
365
+ modelConfig: {
366
+ whisper: {
367
+ encoder: paths.encoder,
368
+ decoder: paths.decoder,
369
+ },
370
+ tokens: modelPaths.tokens,
371
+ numThreads: 2,
372
+ provider: 'cpu',
373
+ debug: 0,
374
+ },
375
+ decodingMethod: 'greedy_search',
376
+ };
377
+ logger.debug('creating Whisper recognizer with config:', JSON.stringify(config, null, 2));
378
+ try {
379
+ const recognizer = new sherpa.OfflineRecognizer(config);
380
+ logger.debug('Whisper recognizer created successfully');
381
+ return recognizer;
382
+ }
383
+ catch (error) {
384
+ logger.error('Failed to create Whisper recognizer:', error);
385
+ throw new TJBotError(`Failed to create Whisper recognizer: ${error}`, { cause: error });
386
+ }
387
+ }
388
+ /**
389
+ * Create Silero VAD instance
390
+ */
391
+ createSileroVad(modelPath) {
392
+ if (!sherpa) {
393
+ throw new TJBotError('Sherpa-ONNX not initialized');
394
+ }
395
+ const config = {
396
+ sileroVad: {
397
+ model: modelPath,
398
+ threshold: 0.5,
399
+ minSpeechDuration: 0.25,
400
+ minSilenceDuration: 0.5,
401
+ windowSize: 512,
402
+ },
403
+ sampleRate: 16000,
404
+ debug: false,
405
+ numThreads: 1,
406
+ };
407
+ const bufferSizeInSeconds = 60;
408
+ logger.debug('creating Silero VAD with config:', JSON.stringify(config, null, 2));
409
+ return new sherpa.Vad(config, bufferSizeInSeconds);
410
+ }
411
+ /**
412
+ * Transcribe using streaming recognition
413
+ */
414
+ async transcribeStreaming(micStream, sampleRate, options) {
415
+ if (!this.recognizer) {
416
+ throw new TJBotError('Recognizer not initialized. Ensure initialize() was called.');
417
+ }
418
+ return new Promise((resolve, reject) => {
419
+ // For streaming (online) recognizers, narrow type to OnlineRecognizer
420
+ const recognizer = this.recognizer;
421
+ const stream = recognizer.createStream();
422
+ let lastText = '';
423
+ let finalText = '';
424
+ const cleanup = () => {
425
+ micStream.removeAllListeners();
426
+ };
427
+ // Handle abort signal
428
+ if (options.abortSignal) {
429
+ options.abortSignal.addEventListener('abort', () => {
430
+ cleanup();
431
+ resolve(finalText || lastText);
432
+ });
433
+ }
434
+ micStream.on('data', (chunk) => {
435
+ try {
436
+ const samples = this.bufferToFloat32LE(chunk);
437
+ stream.acceptWaveform({ sampleRate, samples });
438
+ while (recognizer.isReady(stream)) {
439
+ recognizer.decode(stream);
440
+ }
441
+ const isEndpoint = recognizer.isEndpoint(stream);
442
+ let text = recognizer.getResult(stream).text.trim().toLowerCase();
443
+ if (isEndpoint) {
444
+ // Add tail padding for better recognition
445
+ const tailPadding = new Float32Array(sampleRate * 1.5);
446
+ stream.acceptWaveform({
447
+ samples: tailPadding,
448
+ sampleRate,
449
+ });
450
+ while (recognizer.isReady(stream)) {
451
+ recognizer.decode(stream);
452
+ }
453
+ text = recognizer.getResult(stream).text.trim().toLowerCase();
454
+ }
455
+ if (text && text !== lastText) {
456
+ lastText = text;
457
+ if (options.onPartialResult) {
458
+ options.onPartialResult(text);
459
+ }
460
+ if (isEndpoint) {
461
+ finalText = text;
462
+ if (options.onFinalResult) {
463
+ options.onFinalResult(text);
464
+ }
465
+ }
466
+ }
467
+ if (isEndpoint) {
468
+ recognizer.reset(stream);
469
+ cleanup();
470
+ resolve(finalText);
471
+ }
472
+ }
473
+ catch (error) {
474
+ cleanup();
475
+ reject(new TJBotError('Streaming transcription failed', { cause: error }));
476
+ }
477
+ });
478
+ micStream.on('end', () => {
479
+ cleanup();
480
+ resolve(finalText || lastText);
481
+ });
482
+ micStream.on('error', (error) => {
483
+ cleanup();
484
+ reject(new TJBotError('Microphone stream error', { cause: error }));
485
+ });
486
+ });
487
+ }
488
+ /**
489
+ * Transcribe using offline recognition with optional VAD
490
+ */
491
+ async transcribeOffline(micStream, sampleRate, useVad, options) {
492
+ if (useVad && this.vadPath) {
493
+ return await this.transcribeOfflineWithVad(micStream, sampleRate, options);
494
+ }
495
+ else {
496
+ return await this.transcribeOfflineEnergy(micStream, sampleRate, options);
497
+ }
498
+ }
499
+ /**
500
+ * Transcribe offline with Silero VAD
501
+ */
502
+ async transcribeOfflineWithVad(micStream, sampleRate, options) {
503
+ if (!this.recognizer) {
504
+ throw new TJBotError('Recognizer not initialized');
505
+ }
506
+ if (!this.vadPath) {
507
+ throw new TJBotError('VAD model path not initialized');
508
+ }
509
+ if (!sherpa) {
510
+ throw new TJBotError('Sherpa-ONNX not initialized');
511
+ }
512
+ // Narrow types for use in Promise callbacks
513
+ const recognizer = this.recognizer;
514
+ const vad = this.createSileroVad(this.vadPath);
515
+ const module = sherpa;
516
+ return new Promise((resolve, reject) => {
517
+ const bufferSizeInSeconds = 30;
518
+ const buffer = new module.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
519
+ const transcripts = [];
520
+ const cleanup = () => {
521
+ micStream.removeAllListeners();
522
+ };
523
+ // Handle abort signal
524
+ if (options.abortSignal) {
525
+ options.abortSignal.addEventListener('abort', () => {
526
+ cleanup();
527
+ resolve(transcripts.join(' '));
528
+ });
529
+ }
530
+ micStream.on('data', (chunk) => {
531
+ try {
532
+ const samples = this.bufferToFloat32LE(chunk);
533
+ buffer.push(samples);
534
+ const windowSize = vad.config.sileroVad.windowSize;
535
+ while (buffer.size() > windowSize) {
536
+ const windowSamples = buffer.get(buffer.head(), windowSize);
537
+ buffer.pop(windowSize);
538
+ vad.acceptWaveform(windowSamples);
539
+ }
540
+ while (!vad.isEmpty()) {
541
+ const segment = vad.front();
542
+ vad.pop();
543
+ const stream = recognizer.createStream();
544
+ stream.acceptWaveform({
545
+ samples: segment.samples,
546
+ sampleRate,
547
+ });
548
+ recognizer.decode(stream);
549
+ const result = recognizer.getResult(stream);
550
+ const text = result.text.trim().toLowerCase();
551
+ if (text) {
552
+ transcripts.push(text);
553
+ if (options.onPartialResult) {
554
+ options.onPartialResult(text);
555
+ }
556
+ // Resolve after first complete utterance (single-shot behavior)
557
+ cleanup();
558
+ if (options.onFinalResult) {
559
+ options.onFinalResult(text);
560
+ }
561
+ resolve(text);
562
+ return;
563
+ }
564
+ }
565
+ }
566
+ catch (error) {
567
+ cleanup();
568
+ reject(new TJBotError('Offline VAD transcription failed', { cause: error }));
569
+ }
570
+ });
571
+ micStream.on('end', () => {
572
+ cleanup();
573
+ const finalText = transcripts.join(' ');
574
+ if (options.onFinalResult) {
575
+ options.onFinalResult(finalText);
576
+ }
577
+ resolve(finalText);
578
+ });
579
+ micStream.on('error', (error) => {
580
+ cleanup();
581
+ reject(new TJBotError('Microphone stream error', { cause: error }));
582
+ });
583
+ });
584
+ }
585
+ /**
586
+ * Transcribe offline with simple energy-based silence detection
587
+ */
588
+ async transcribeOfflineEnergy(micStream, sampleRate, options) {
589
+ if (!this.recognizer) {
590
+ throw new TJBotError('Recognizer not initialized');
591
+ }
592
+ return new Promise((resolve, reject) => {
593
+ // Narrow recognizer to OfflineRecognizer for offline methods
594
+ const recognizer = this.recognizer;
595
+ const speechChunks = [];
596
+ let silenceMs = 0;
597
+ const silenceLimitMs = 700;
598
+ const rmsThreshold = 1e-4;
599
+ const transcripts = [];
600
+ const cleanup = () => {
601
+ micStream.removeAllListeners();
602
+ };
603
+ // Handle abort signal
604
+ if (options.abortSignal) {
605
+ options.abortSignal.addEventListener('abort', () => {
606
+ cleanup();
607
+ resolve(transcripts.join(' '));
608
+ });
609
+ }
610
+ micStream.on('data', (chunk) => {
611
+ try {
612
+ const samples = this.bufferToFloat32LE(chunk);
613
+ const rms = this.getRMS(samples);
614
+ const durationMs = (samples.length / sampleRate) * 1000;
615
+ if (rms > rmsThreshold) {
616
+ speechChunks.push(samples);
617
+ silenceMs = 0;
618
+ }
619
+ else {
620
+ silenceMs += durationMs;
621
+ }
622
+ if (speechChunks.length > 0 && silenceMs >= silenceLimitMs) {
623
+ // Combine speech chunks
624
+ const total = speechChunks.reduce((acc, arr) => acc + arr.length, 0);
625
+ const combined = new Float32Array(total);
626
+ let offset = 0;
627
+ for (const arr of speechChunks) {
628
+ combined.set(arr, offset);
629
+ offset += arr.length;
630
+ }
631
+ const stream = recognizer.createStream();
632
+ stream.acceptWaveform({ samples: combined, sampleRate });
633
+ recognizer.decode(stream);
634
+ const result = recognizer.getResult(stream);
635
+ const text = result.text.trim().toLowerCase();
636
+ if (text) {
637
+ transcripts.push(text);
638
+ if (options.onPartialResult) {
639
+ options.onPartialResult(text);
640
+ }
641
+ // Resolve after first complete utterance (single-shot behavior)
642
+ cleanup();
643
+ if (options.onFinalResult) {
644
+ options.onFinalResult(text);
645
+ }
646
+ resolve(text);
647
+ return;
648
+ }
649
+ speechChunks.length = 0;
650
+ silenceMs = 0;
651
+ }
652
+ }
653
+ catch (error) {
654
+ cleanup();
655
+ reject(new TJBotError('Offline energy transcription failed', { cause: error }));
656
+ }
657
+ });
658
+ micStream.on('end', () => {
659
+ cleanup();
660
+ const finalText = transcripts.join(' ');
661
+ if (options.onFinalResult) {
662
+ options.onFinalResult(finalText);
663
+ }
664
+ resolve(finalText);
665
+ });
666
+ micStream.on('error', (error) => {
667
+ cleanup();
668
+ reject(new TJBotError('Microphone stream error', { cause: error }));
669
+ });
670
+ });
671
+ }
672
+ /**
673
+ * Convert Int16 PCM buffer to Float32 samples
674
+ */
675
+ bufferToFloat32LE(buf) {
676
+ const len = buf.length / 2;
677
+ const out = new Float32Array(len);
678
+ for (let i = 0; i < len; ++i) {
679
+ out[i] = buf.readInt16LE(i * 2) / 32768;
680
+ }
681
+ return out;
682
+ }
683
+ /**
684
+ * Calculate RMS (Root Mean Square) of audio samples
685
+ */
686
+ getRMS(samples) {
687
+ let sum = 0;
688
+ for (let i = 0; i < samples.length; i++) {
689
+ sum += samples[i] * samples[i];
690
+ }
691
+ return Math.sqrt(sum / samples.length);
692
+ }
693
+ }
694
+ //# sourceMappingURL=sherpa-onnx-stt.js.map