speechflow 2.2.1 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{etc/claude.md → AGENTS.md} +8 -3
- package/CHANGELOG.md +98 -1
- package/README.md +28 -4
- package/etc/speechflow.yaml +3 -1
- package/etc/stx.conf +1 -1
- package/package.json +6 -6
- package/speechflow-cli/dst/speechflow-main-api.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-main-api.js +57 -16
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
- package/speechflow-cli/dst/speechflow-main-config.js +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +55 -21
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +6 -3
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +17 -19
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +25 -8
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +16 -13
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +7 -4
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +21 -16
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +33 -11
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +4 -3
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +19 -11
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +33 -29
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +42 -23
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +13 -5
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +8 -2
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +33 -27
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +16 -5
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js +194 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +24 -10
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +22 -7
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +4 -2
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +10 -2
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +3 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +160 -57
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +34 -14
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +3 -2
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +18 -7
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +27 -15
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +13 -7
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +25 -12
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +32 -20
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +84 -63
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +75 -20
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.js +5 -7
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio-wt.js +31 -5
- package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +28 -15
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-error.js +2 -2
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.js +13 -3
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +3 -2
- package/speechflow-cli/dst/speechflow-util-misc.js +63 -6
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -17
- package/speechflow-cli/dst/speechflow-util-queue.js +98 -78
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +35 -8
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.js +1 -1
- package/speechflow-cli/dst/speechflow.d.ts +1 -1
- package/speechflow-cli/dst/speechflow.js +1 -1
- package/speechflow-cli/etc/eslint.mjs +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/etc/stx.conf +8 -2
- package/speechflow-cli/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +2 -1
- package/speechflow-cli/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-cli/package.d/kitten-tts-js+0.1.2.patch +24 -0
- package/speechflow-cli/package.d/speex-resampler+3.0.1.patch +56 -0
- package/speechflow-cli/package.json +40 -30
- package/speechflow-cli/src/lib.d.ts +19 -1
- package/speechflow-cli/src/speechflow-main-api.ts +64 -19
- package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
- package/speechflow-cli/src/speechflow-main-config.ts +1 -1
- package/speechflow-cli/src/speechflow-main-graph.ts +56 -22
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -1
- package/speechflow-cli/src/speechflow-main-status.ts +6 -3
- package/speechflow-cli/src/speechflow-main.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +19 -20
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +31 -13
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +17 -13
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +9 -8
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +8 -4
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +22 -18
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +43 -16
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +4 -3
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +24 -12
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +10 -9
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +38 -31
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +47 -25
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +17 -6
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +12 -4
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +39 -31
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +16 -5
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-kitten.ts +178 -0
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +24 -10
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +22 -7
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +4 -2
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +10 -2
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +215 -62
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +39 -15
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -3
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +1 -1
- package/speechflow-cli/src/speechflow-node-xio-device.ts +21 -7
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +30 -16
- package/speechflow-cli/src/speechflow-node-xio-file.ts +15 -7
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +28 -15
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +35 -22
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +92 -70
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +79 -22
- package/speechflow-cli/src/speechflow-node.ts +7 -8
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +46 -7
- package/speechflow-cli/src/speechflow-util-audio.ts +31 -17
- package/speechflow-cli/src/speechflow-util-error.ts +3 -3
- package/speechflow-cli/src/speechflow-util-llm.ts +14 -3
- package/speechflow-cli/src/speechflow-util-misc.ts +63 -6
- package/speechflow-cli/src/speechflow-util-queue.ts +103 -81
- package/speechflow-cli/src/speechflow-util-stream.ts +40 -8
- package/speechflow-cli/src/speechflow-util.ts +1 -1
- package/speechflow-cli/src/speechflow.ts +1 -1
- package/speechflow-ui-db/dst/index.html +1 -1
- package/speechflow-ui-db/dst/index.js +15 -15
- package/speechflow-ui-db/etc/eslint.mjs +1 -1
- package/speechflow-ui-db/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-db/etc/stx.conf +1 -1
- package/speechflow-ui-db/etc/stylelint.js +1 -1
- package/speechflow-ui-db/etc/stylelint.yaml +1 -1
- package/speechflow-ui-db/etc/vite-client.mts +1 -1
- package/speechflow-ui-db/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-db/package.json +22 -16
- package/speechflow-ui-db/src/app.styl +1 -1
- package/speechflow-ui-db/src/app.vue +1 -1
- package/speechflow-ui-db/src/index.html +1 -1
- package/speechflow-ui-db/src/index.ts +1 -1
- package/speechflow-ui-st/dst/index.html +1 -1
- package/speechflow-ui-st/dst/index.js +31 -31
- package/speechflow-ui-st/etc/eslint.mjs +1 -1
- package/speechflow-ui-st/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-st/etc/stx.conf +1 -1
- package/speechflow-ui-st/etc/stylelint.js +1 -1
- package/speechflow-ui-st/etc/stylelint.yaml +1 -1
- package/speechflow-ui-st/etc/vite-client.mts +1 -1
- package/speechflow-ui-st/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-st/package.json +23 -17
- package/speechflow-ui-st/src/app.styl +1 -1
- package/speechflow-ui-st/src/app.vue +1 -1
- package/speechflow-ui-st/src/index.html +1 -1
- package/speechflow-ui-st/src/index.ts +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -258,6 +258,9 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
258
258
|
return
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
+
/* await forthcoming audio chunks (forward declaration) */
|
|
262
|
+
let awaitForthcomingChunks: () => void = () => {}
|
|
263
|
+
|
|
261
264
|
/* flush pending audio chunks */
|
|
262
265
|
const flushPendingChunks = () => {
|
|
263
266
|
let pushed = 0
|
|
@@ -289,22 +292,22 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
289
292
|
this.push(chunk)
|
|
290
293
|
pushed++
|
|
291
294
|
}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/* in unplugged mode, if no chunk was pushed (all were
|
|
298
|
+
non-speech), we need to wait event-driven for new
|
|
299
|
+
data, as the stream won't call read() again until
|
|
300
|
+
we push something */
|
|
301
|
+
if (pushed === 0
|
|
302
|
+
&& !self.closing
|
|
303
|
+
&& !self.activeEventListeners.has(awaitForthcomingChunks)) {
|
|
304
|
+
self.queue.once("write", awaitForthcomingChunks)
|
|
305
|
+
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
303
306
|
}
|
|
304
307
|
}
|
|
305
308
|
|
|
306
309
|
/* await forthcoming audio chunks */
|
|
307
|
-
|
|
310
|
+
awaitForthcomingChunks = () => {
|
|
308
311
|
self.activeEventListeners.delete(awaitForthcomingChunks)
|
|
309
312
|
if (self.closing)
|
|
310
313
|
return
|
|
@@ -339,16 +342,28 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
339
342
|
|
|
340
343
|
/* close node */
|
|
341
344
|
async close () {
|
|
342
|
-
/* indicate closing */
|
|
343
|
-
this.closing = true
|
|
344
|
-
|
|
345
345
|
/* cleanup tail timer */
|
|
346
346
|
if (this.tailTimer !== null) {
|
|
347
347
|
clearTimeout(this.tailTimer)
|
|
348
348
|
this.tailTimer = null
|
|
349
349
|
}
|
|
350
350
|
|
|
351
|
-
/*
|
|
351
|
+
/* flush VAD (before closing, as flush triggers callbacks which need active state) */
|
|
352
|
+
if (this.vad !== null) {
|
|
353
|
+
try {
|
|
354
|
+
const flushPromise = this.vad.flush()
|
|
355
|
+
const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
|
|
356
|
+
await Promise.race([ flushPromise, timeoutPromise ])
|
|
357
|
+
}
|
|
358
|
+
catch (error) {
|
|
359
|
+
this.log("warning", `VAD flush error during close: ${error}`)
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/* indicate closing */
|
|
364
|
+
this.closing = true
|
|
365
|
+
|
|
366
|
+
/* remove all remaining event listeners */
|
|
352
367
|
this.activeEventListeners.forEach((listener) => {
|
|
353
368
|
this.queue.removeListener("write", listener)
|
|
354
369
|
})
|
|
@@ -360,23 +375,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
360
375
|
this.stream = null
|
|
361
376
|
}
|
|
362
377
|
|
|
363
|
-
/*
|
|
364
|
-
this.queue.pointerDelete("recv")
|
|
365
|
-
this.queue.pointerDelete("vad")
|
|
366
|
-
this.queue.pointerDelete("send")
|
|
367
|
-
|
|
368
|
-
/* close VAD */
|
|
378
|
+
/* destroy VAD */
|
|
369
379
|
if (this.vad !== null) {
|
|
370
|
-
try {
|
|
371
|
-
const flushPromise = this.vad.flush()
|
|
372
|
-
const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
|
|
373
|
-
await Promise.race([ flushPromise, timeoutPromise ])
|
|
374
|
-
}
|
|
375
|
-
catch (error) {
|
|
376
|
-
this.log("warning", `VAD flush error during close: ${error}`)
|
|
377
|
-
}
|
|
378
380
|
this.vad.destroy()
|
|
379
381
|
this.vad = null
|
|
380
382
|
}
|
|
383
|
+
|
|
384
|
+
/* cleanup queue pointers */
|
|
385
|
+
this.queue.pointerDelete("recv")
|
|
386
|
+
this.queue.pointerDelete("vad")
|
|
387
|
+
this.queue.pointerDelete("send")
|
|
381
388
|
}
|
|
382
389
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -183,9 +183,10 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
183
183
|
callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
|
|
184
184
|
return
|
|
185
185
|
}
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
186
|
+
const chunkNew = chunk.clone()
|
|
187
|
+
chunkNew.payload = chunk.payload.subarray(44)
|
|
188
|
+
this.push(chunkNew)
|
|
189
|
+
totalSize += chunkNew.payload.byteLength
|
|
189
190
|
callback()
|
|
190
191
|
}
|
|
191
192
|
else {
|
|
@@ -210,7 +211,7 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
210
211
|
sampleRate: self.config.audioSampleRate,
|
|
211
212
|
bitDepth: self.config.audioBitDepth
|
|
212
213
|
})
|
|
213
|
-
const headerChunk = headerChunkSent
|
|
214
|
+
const headerChunk = headerChunkSent.clone()
|
|
214
215
|
headerChunk.payload = headerBuffer
|
|
215
216
|
headerChunk.meta.set("chunk:seek", 0)
|
|
216
217
|
this.push(headerChunk)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -42,6 +42,7 @@ class AsyncQueue<T> {
|
|
|
42
42
|
resolve?.({ value: null, done: true })
|
|
43
43
|
}
|
|
44
44
|
this.queue.length = 0
|
|
45
|
+
this.queue.push(null)
|
|
45
46
|
}
|
|
46
47
|
async * [Symbol.asyncIterator] (): AsyncIterator<T> {
|
|
47
48
|
while (true) {
|
|
@@ -71,8 +72,9 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
71
72
|
private client: TranscribeStreamingClient | null = null
|
|
72
73
|
private clientStream: AsyncIterable<TranscriptResultStream> | null = null
|
|
73
74
|
private audioQueue: AsyncQueue<Uint8Array> | null = null
|
|
75
|
+
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
76
|
+
private clientStreamStarting = false
|
|
74
77
|
private closing = false
|
|
75
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
76
78
|
|
|
77
79
|
/* construct node */
|
|
78
80
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -110,10 +112,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
110
112
|
throw new Error("Amazon Transcribe node currently supports PCM-S16LE audio only")
|
|
111
113
|
|
|
112
114
|
/* clear destruction flag */
|
|
113
|
-
this.closing
|
|
115
|
+
this.closing = false
|
|
116
|
+
this.clientStreamStarting = false
|
|
114
117
|
|
|
115
118
|
/* create queue for results */
|
|
116
|
-
this.queue = new util.
|
|
119
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
117
120
|
|
|
118
121
|
/* create a store for the meta information */
|
|
119
122
|
const metastore = new util.TimeStore<Map<string, any>>()
|
|
@@ -136,26 +139,36 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
136
139
|
}
|
|
137
140
|
})(audioQueue)
|
|
138
141
|
|
|
142
|
+
/* provide a self-reference for use in callbacks below */
|
|
143
|
+
const self = this
|
|
144
|
+
|
|
139
145
|
/* start streaming */
|
|
140
146
|
const ensureAudioStreamActive = async () => {
|
|
141
|
-
if (this.clientStream !== null || this.closing)
|
|
147
|
+
if (this.clientStream !== null || this.clientStreamStarting || this.closing)
|
|
142
148
|
return
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
LanguageCode:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
149
|
+
this.clientStreamStarting = true
|
|
150
|
+
try {
|
|
151
|
+
const language: LanguageCode = this.params.language === "de" ? "de-DE" : "en-US"
|
|
152
|
+
const command = new StartStreamTranscriptionCommand({
|
|
153
|
+
LanguageCode: language,
|
|
154
|
+
EnablePartialResultsStabilization: this.params.interim,
|
|
155
|
+
...(this.params.interim ? { PartialResultsStability: "low" } : {}),
|
|
156
|
+
MediaEncoding: "pcm",
|
|
157
|
+
MediaSampleRateHertz: this.config.audioSampleRate,
|
|
158
|
+
AudioStream: audioStream,
|
|
159
|
+
})
|
|
160
|
+
const response = await this.client!.send(command)
|
|
161
|
+
const stream = response.TranscriptResultStream
|
|
162
|
+
if (!stream)
|
|
163
|
+
throw new Error("no TranscriptResultStream returned")
|
|
164
|
+
this.clientStream = stream
|
|
165
|
+
}
|
|
166
|
+
catch (err) {
|
|
167
|
+
this.clientStreamStarting = false
|
|
168
|
+
throw err
|
|
169
|
+
}
|
|
157
170
|
;(async () => {
|
|
158
|
-
for await (const event of
|
|
171
|
+
for await (const event of this.clientStream!) {
|
|
159
172
|
const te = event.TranscriptEvent
|
|
160
173
|
if (!te?.Transcript?.Results)
|
|
161
174
|
continue
|
|
@@ -192,8 +205,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
192
205
|
this.queue?.write(chunk)
|
|
193
206
|
}
|
|
194
207
|
}
|
|
208
|
+
self.queue?.write(null)
|
|
195
209
|
})().catch((err: unknown) => {
|
|
196
210
|
this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
|
|
211
|
+
this.clientStream = null
|
|
212
|
+
this.clientStreamStarting = false
|
|
197
213
|
})
|
|
198
214
|
}
|
|
199
215
|
|
|
@@ -201,7 +217,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
201
217
|
this.timeOpen = DateTime.now()
|
|
202
218
|
|
|
203
219
|
/* provide Duplex stream and internally attach to Amazon Transcribe API */
|
|
204
|
-
const self = this
|
|
205
220
|
const reads = new util.PromiseSet<void>()
|
|
206
221
|
this.stream = new Stream.Duplex({
|
|
207
222
|
writableObjectMode: true,
|
|
@@ -235,12 +250,18 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
235
250
|
callback()
|
|
236
251
|
return
|
|
237
252
|
}
|
|
238
|
-
|
|
253
|
+
|
|
254
|
+
/* signal end-of-audio to Amazon Transcribe first */
|
|
255
|
+
audioQueue.push(null)
|
|
256
|
+
|
|
257
|
+
/* await all pending read operations (with safety timeout) */
|
|
258
|
+
await reads.awaitAll(5000)
|
|
259
|
+
|
|
260
|
+
/* clean up Amazon Transcribe connection and audio queue */
|
|
239
261
|
util.run("closing Amazon Transcribe connection",
|
|
240
262
|
() => self.client!.destroy(),
|
|
241
263
|
(error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
|
|
242
264
|
)
|
|
243
|
-
audioQueue.push(null) /* do not push null to stream, let Amazon Transcribe do it */
|
|
244
265
|
audioQueue.destroy()
|
|
245
266
|
callback()
|
|
246
267
|
},
|
|
@@ -259,7 +280,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
259
280
|
this.push(null)
|
|
260
281
|
}
|
|
261
282
|
else {
|
|
262
|
-
self.log("debug", `received data (${chunk.payload.length} bytes)
|
|
283
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
263
284
|
this.push(chunk)
|
|
264
285
|
}
|
|
265
286
|
}).catch((error: unknown) => {
|
|
@@ -273,7 +294,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
273
294
|
/* close node */
|
|
274
295
|
async close () {
|
|
275
296
|
/* indicate closing first to stop all async operations */
|
|
276
|
-
this.closing
|
|
297
|
+
this.closing = true
|
|
298
|
+
this.clientStreamStarting = false
|
|
277
299
|
|
|
278
300
|
/* shutdown stream */
|
|
279
301
|
if (this.stream !== null) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -24,7 +24,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
24
24
|
private dg: Deepgram.LiveClient | null = null
|
|
25
25
|
private closing = false
|
|
26
26
|
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
27
|
-
private queue: util.
|
|
27
|
+
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
28
28
|
|
|
29
29
|
/* construct node */
|
|
30
30
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -64,7 +64,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
64
64
|
balance += balanceResponse.result.balances[0]?.amount ?? 0
|
|
65
65
|
}
|
|
66
66
|
}
|
|
67
|
-
else if (response
|
|
67
|
+
else if (response !== null && response.error !== null)
|
|
68
68
|
this.log("warning", `API error fetching projects: ${response.error}`)
|
|
69
69
|
}
|
|
70
70
|
catch (error) {
|
|
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
83
83
|
this.closing = false
|
|
84
84
|
|
|
85
85
|
/* create queue for results */
|
|
86
|
-
this.queue = new util.
|
|
86
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
87
87
|
|
|
88
88
|
/* create a store for the meta information */
|
|
89
89
|
const metastore = new util.TimeStore<Map<string, any>>()
|
|
@@ -145,7 +145,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
145
145
|
{ word: string, punctuated_word?: string, start: number, end: number }[]
|
|
146
146
|
const isFinal = (data.is_final as boolean) ?? false
|
|
147
147
|
const speechFinal = (data.speech_final as boolean) ?? false
|
|
148
|
-
const kind = (
|
|
148
|
+
const kind = (isFinal || (endpointing > 0 && speechFinal)) ? "final" : "intermediate"
|
|
149
149
|
if (text === "")
|
|
150
150
|
this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
|
|
151
151
|
else {
|
|
@@ -206,6 +206,13 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
206
206
|
}
|
|
207
207
|
resolve(true)
|
|
208
208
|
})
|
|
209
|
+
this.dg!.once(Deepgram.LiveTranscriptionEvents.Error, (err: Error) => {
|
|
210
|
+
if (this.connectionTimeout !== null) {
|
|
211
|
+
clearTimeout(this.connectionTimeout)
|
|
212
|
+
this.connectionTimeout = null
|
|
213
|
+
}
|
|
214
|
+
reject(err)
|
|
215
|
+
})
|
|
209
216
|
})
|
|
210
217
|
|
|
211
218
|
/* remember opening time to receive time zero offset */
|
|
@@ -234,7 +241,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
234
241
|
if (chunk.meta.size > 0)
|
|
235
242
|
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
236
243
|
try {
|
|
237
|
-
|
|
244
|
+
/* send buffer (and intentionally discard all time information) */
|
|
245
|
+
self.dg.send(chunk.payload.buffer.slice(
|
|
246
|
+
chunk.payload.byteOffset,
|
|
247
|
+
chunk.payload.byteOffset + chunk.payload.byteLength
|
|
248
|
+
))
|
|
238
249
|
}
|
|
239
250
|
catch (error) {
|
|
240
251
|
callback(util.ensureError(error, "failed to send to Deepgram"))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -24,8 +24,9 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
24
24
|
/* internal state */
|
|
25
25
|
private client: GoogleSpeech.SpeechClient | null = null
|
|
26
26
|
private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
|
|
27
|
-
private queue: util.
|
|
27
|
+
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
28
28
|
private closing = false
|
|
29
|
+
private lastResultEndMs = 0
|
|
29
30
|
|
|
30
31
|
/* construct node */
|
|
31
32
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -62,8 +63,11 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
62
63
|
/* clear destruction flag */
|
|
63
64
|
this.closing = false
|
|
64
65
|
|
|
66
|
+
/* reset result end time tracking */
|
|
67
|
+
this.lastResultEndMs = 0
|
|
68
|
+
|
|
65
69
|
/* create queue for results */
|
|
66
|
-
this.queue = new util.
|
|
70
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
67
71
|
|
|
68
72
|
/* create a store for the meta information */
|
|
69
73
|
const metastore = new util.TimeStore<Map<string, any>>()
|
|
@@ -152,12 +156,16 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
152
156
|
/* fallback: use result timing */
|
|
153
157
|
const resultEnd = result.resultEndTime
|
|
154
158
|
if (resultEnd) {
|
|
155
|
-
|
|
159
|
+
tsStart = Duration.fromMillis(this.lastResultEndMs).plus(this.timeZeroOffset)
|
|
160
|
+
tsEnd = Duration.fromMillis(
|
|
156
161
|
(Number(resultEnd.seconds ?? 0) * 1000) +
|
|
157
162
|
(Number(resultEnd.nanos ?? 0) / 1000000)
|
|
158
163
|
).plus(this.timeZeroOffset)
|
|
159
164
|
}
|
|
160
165
|
}
|
|
166
|
+
/* track raw end time for next fallback estimation */
|
|
167
|
+
this.lastResultEndMs = tsEnd.minus(this.timeZeroOffset).toMillis()
|
|
168
|
+
|
|
161
169
|
this.log("info", `text received (start: ${tsStart.toMillis()}ms, ` +
|
|
162
170
|
`end: ${tsEnd.toMillis()}ms, ` +
|
|
163
171
|
`kind: ${isFinal ? "final" : "intermediate"}): ` +
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -25,7 +25,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
25
25
|
/* internal state */
|
|
26
26
|
private openai: OpenAI | null = null
|
|
27
27
|
private ws: ws.WebSocket | null = null
|
|
28
|
-
private queue: util.
|
|
28
|
+
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
29
29
|
private resampler: SpeexResampler | null = null
|
|
30
30
|
private closing = false
|
|
31
31
|
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
@@ -67,7 +67,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
67
67
|
this.closing = false
|
|
68
68
|
|
|
69
69
|
/* create queue for results */
|
|
70
|
-
this.queue = new util.
|
|
70
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
71
71
|
|
|
72
72
|
/* create a store for the meta information */
|
|
73
73
|
const metastore = new util.TimeStore<Map<string, any>>()
|
|
@@ -139,10 +139,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
139
139
|
})
|
|
140
140
|
|
|
141
141
|
/* hook onto session events */
|
|
142
|
-
this.ws.on("open", () => {
|
|
143
|
-
this.log("info", "WebSocket connection opened")
|
|
144
|
-
sendMessage({ type: "transcription.create" })
|
|
145
|
-
})
|
|
146
142
|
this.ws.on("close", () => {
|
|
147
143
|
this.log("info", "WebSocket connection closed")
|
|
148
144
|
if (!this.closing && this.queue !== null)
|
|
@@ -167,8 +163,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
167
163
|
}, new Map<string, any>())
|
|
168
164
|
}
|
|
169
165
|
|
|
170
|
-
/*
|
|
171
|
-
|
|
166
|
+
/* remember opening time to receive time zero offset */
|
|
167
|
+
this.timeOpen = DateTime.now()
|
|
168
|
+
|
|
169
|
+
/* track transcription text per item */
|
|
170
|
+
const textByItem = new Map<string, string>()
|
|
172
171
|
this.ws.on("message", (data) => {
|
|
173
172
|
let ev: Record<string, unknown>
|
|
174
173
|
try {
|
|
@@ -186,13 +185,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
186
185
|
case "transcription_session.created":
|
|
187
186
|
break
|
|
188
187
|
case "conversation.item.created": {
|
|
189
|
-
|
|
188
|
+
const itemId = (ev.item as Record<string, unknown>)?.id as string
|
|
189
|
+
if (itemId)
|
|
190
|
+
textByItem.set(itemId, "")
|
|
190
191
|
break
|
|
191
192
|
}
|
|
192
193
|
case "conversation.item.input_audio_transcription.delta": {
|
|
193
|
-
|
|
194
|
+
const itemId = ev.item_id as string
|
|
195
|
+
const text = (textByItem.get(itemId) ?? "") + (ev.delta as string)
|
|
196
|
+
textByItem.set(itemId, text)
|
|
194
197
|
if (this.params.interim && !this.closing && this.queue !== null) {
|
|
195
|
-
const itemId = ev.item_id as string
|
|
196
198
|
const timing = speechTiming.get(itemId)
|
|
197
199
|
const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
198
200
|
const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
|
|
@@ -204,7 +206,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
204
206
|
}
|
|
205
207
|
case "conversation.item.input_audio_transcription.completed": {
|
|
206
208
|
if (!this.closing && this.queue !== null) {
|
|
207
|
-
text
|
|
209
|
+
const text = ev.transcript as string
|
|
208
210
|
const itemId = ev.item_id as string
|
|
209
211
|
const timing = speechTiming.get(itemId)
|
|
210
212
|
const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
@@ -213,8 +215,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
213
215
|
chunk.meta = aggregateMeta(start, end)
|
|
214
216
|
metastore.prune(start)
|
|
215
217
|
speechTiming.delete(itemId)
|
|
218
|
+
textByItem.delete(itemId)
|
|
216
219
|
this.queue.write(chunk)
|
|
217
|
-
text = ""
|
|
218
220
|
}
|
|
219
221
|
break
|
|
220
222
|
}
|
|
@@ -248,9 +250,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
248
250
|
}
|
|
249
251
|
})
|
|
250
252
|
|
|
251
|
-
/* remember opening time to receive time zero offset */
|
|
252
|
-
this.timeOpen = DateTime.now()
|
|
253
|
-
|
|
254
253
|
/* provide Duplex stream and internally attach to OpenAI API */
|
|
255
254
|
const self = this
|
|
256
255
|
const reads = new util.PromiseSet<void>()
|
|
@@ -260,7 +259,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
260
259
|
decodeStrings: false,
|
|
261
260
|
highWaterMark: 1,
|
|
262
261
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
263
|
-
if (self.closing || self.ws === null) {
|
|
262
|
+
if (self.closing || self.ws === null || self.resampler === null) {
|
|
264
263
|
callback(new Error("stream already destroyed"))
|
|
265
264
|
return
|
|
266
265
|
}
|
|
@@ -274,7 +273,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
274
273
|
if (chunk.meta.size > 0)
|
|
275
274
|
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
276
275
|
try {
|
|
277
|
-
const payload = self.resampler
|
|
276
|
+
const payload = self.resampler.processChunk(chunk.payload)
|
|
278
277
|
const audioB64 = payload.toString("base64")
|
|
279
278
|
sendMessage({
|
|
280
279
|
type: "input_audio_buffer.append",
|
|
@@ -296,17 +295,23 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
296
295
|
}
|
|
297
296
|
try {
|
|
298
297
|
sendMessage({ type: "input_audio_buffer.commit" })
|
|
299
|
-
self.ws
|
|
300
|
-
await
|
|
298
|
+
self.ws?.close()
|
|
299
|
+
await new Promise<void>((resolve) => {
|
|
300
|
+
const timeout = setTimeout(() => { resolve() }, 5000)
|
|
301
|
+
self.ws?.once("close", () => {
|
|
302
|
+
clearTimeout(timeout)
|
|
303
|
+
resolve()
|
|
304
|
+
})
|
|
305
|
+
})
|
|
301
306
|
}
|
|
302
307
|
catch (error) {
|
|
303
308
|
self.log("warning", `error closing OpenAI connection: ${error}`)
|
|
304
309
|
}
|
|
310
|
+
|
|
311
|
+
/* await all read operations */
|
|
305
312
|
await reads.awaitAll()
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
this.push(chunk)
|
|
309
|
-
this.push(null)
|
|
313
|
+
|
|
314
|
+
/* NOTICE: do not push null here -- let the WebSocket close event handle it */
|
|
310
315
|
callback()
|
|
311
316
|
},
|
|
312
317
|
read (size) {
|
|
@@ -346,6 +351,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
346
351
|
this.connectionTimeout = null
|
|
347
352
|
}
|
|
348
353
|
|
|
354
|
+
/* shutdown stream */
|
|
355
|
+
if (this.stream !== null) {
|
|
356
|
+
await util.destroyStream(this.stream)
|
|
357
|
+
this.stream = null
|
|
358
|
+
}
|
|
359
|
+
|
|
349
360
|
/* signal EOF to any pending read operations */
|
|
350
361
|
if (this.queue !== null) {
|
|
351
362
|
this.queue.write(null)
|
|
@@ -362,12 +373,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
362
373
|
this.openai = null
|
|
363
374
|
|
|
364
375
|
/* close resampler */
|
|
365
|
-
this.resampler
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
if (this.stream !== null) {
|
|
369
|
-
await util.destroyStream(this.stream)
|
|
370
|
-
this.stream = null
|
|
376
|
+
if (this.resampler !== null) {
|
|
377
|
+
this.resampler.destroy()
|
|
378
|
+
this.resampler = null
|
|
371
379
|
}
|
|
372
380
|
}
|
|
373
381
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -131,9 +131,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
131
131
|
else if (chunk.payload === "")
|
|
132
132
|
callback()
|
|
133
133
|
else {
|
|
134
|
+
let callbackCalled = false
|
|
134
135
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
135
136
|
processTimeout = null
|
|
136
|
-
|
|
137
|
+
if (!callbackCalled) {
|
|
138
|
+
callbackCalled = true
|
|
139
|
+
callback(new Error("AWS Polly API timeout"))
|
|
140
|
+
}
|
|
137
141
|
}, 60 * 1000)
|
|
138
142
|
const clearProcessTimeout = () => {
|
|
139
143
|
if (processTimeout !== null) {
|
|
@@ -143,8 +147,11 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
143
147
|
}
|
|
144
148
|
self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
|
|
145
149
|
textToSpeech(chunk.payload as string).then((buffer) => {
|
|
150
|
+
clearProcessTimeout()
|
|
151
|
+
if (callbackCalled)
|
|
152
|
+
return
|
|
153
|
+
callbackCalled = true
|
|
146
154
|
if (self.closing) {
|
|
147
|
-
clearProcessTimeout()
|
|
148
155
|
callback(new Error("stream destroyed during processing"))
|
|
149
156
|
return
|
|
150
157
|
}
|
|
@@ -157,11 +164,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
157
164
|
chunkNew.type = "audio"
|
|
158
165
|
chunkNew.payload = buffer
|
|
159
166
|
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
160
|
-
clearProcessTimeout()
|
|
161
167
|
this.push(chunkNew)
|
|
162
168
|
callback()
|
|
163
169
|
}).catch((error: unknown) => {
|
|
164
170
|
clearProcessTimeout()
|
|
171
|
+
if (callbackCalled)
|
|
172
|
+
return
|
|
173
|
+
callbackCalled = true
|
|
165
174
|
callback(util.ensureError(error, "AWS Polly processing failed"))
|
|
166
175
|
})
|
|
167
176
|
}
|
|
@@ -184,8 +193,10 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
184
193
|
}
|
|
185
194
|
|
|
186
195
|
/* destroy resampler */
|
|
187
|
-
if (this.resampler !== null)
|
|
196
|
+
if (this.resampler !== null) {
|
|
197
|
+
this.resampler.destroy()
|
|
188
198
|
this.resampler = null
|
|
199
|
+
}
|
|
189
200
|
|
|
190
201
|
/* destroy AWS Polly API */
|
|
191
202
|
if (this.client !== null) {
|