speechflow 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{etc/claude.md → AGENTS.md} +8 -3
- package/CHANGELOG.md +70 -1
- package/README.md +28 -4
- package/etc/speechflow.yaml +3 -1
- package/etc/stx.conf +1 -1
- package/package.json +6 -6
- package/speechflow-cli/dst/speechflow-main-api.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-main-api.js +57 -16
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
- package/speechflow-cli/dst/speechflow-main-config.js +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +55 -21
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +6 -3
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +7 -10
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +8 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +9 -5
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -4
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +20 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +33 -11
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +4 -3
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +19 -11
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +33 -29
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +34 -20
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +13 -5
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +3 -2
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +33 -27
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +16 -5
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js +194 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +21 -9
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +21 -7
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +4 -2
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +34 -14
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +3 -2
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +18 -7
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +23 -11
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +13 -7
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +25 -12
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +32 -20
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +78 -62
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +63 -18
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.js +5 -7
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio-wt.js +31 -5
- package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +25 -14
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-error.js +2 -2
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.js +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +3 -2
- package/speechflow-cli/dst/speechflow-util-misc.js +63 -6
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +5 -17
- package/speechflow-cli/dst/speechflow-util-queue.js +57 -78
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +35 -8
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.js +1 -1
- package/speechflow-cli/dst/speechflow.d.ts +1 -1
- package/speechflow-cli/dst/speechflow.js +1 -1
- package/speechflow-cli/etc/eslint.mjs +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/etc/stx.conf +8 -2
- package/speechflow-cli/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +2 -1
- package/speechflow-cli/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-cli/package.d/kitten-tts-js+0.1.2.patch +24 -0
- package/speechflow-cli/package.d/speex-resampler+3.0.1.patch +56 -0
- package/speechflow-cli/package.json +40 -30
- package/speechflow-cli/src/lib.d.ts +19 -1
- package/speechflow-cli/src/speechflow-main-api.ts +64 -19
- package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
- package/speechflow-cli/src/speechflow-main-config.ts +1 -1
- package/speechflow-cli/src/speechflow-main-graph.ts +56 -22
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -1
- package/speechflow-cli/src/speechflow-main-status.ts +6 -3
- package/speechflow-cli/src/speechflow-main.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +7 -11
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -6
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +10 -5
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -4
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +43 -16
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +4 -3
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +24 -12
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +10 -9
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +38 -31
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +35 -22
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +17 -6
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +5 -4
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +39 -31
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +16 -5
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-kitten.ts +178 -0
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +21 -9
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +21 -7
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +4 -2
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +39 -15
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -3
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +1 -1
- package/speechflow-cli/src/speechflow-node-xio-device.ts +21 -7
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +25 -11
- package/speechflow-cli/src/speechflow-node-xio-file.ts +15 -7
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +28 -15
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +35 -22
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +85 -69
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +67 -20
- package/speechflow-cli/src/speechflow-node.ts +7 -8
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +46 -7
- package/speechflow-cli/src/speechflow-util-audio.ts +27 -15
- package/speechflow-cli/src/speechflow-util-error.ts +3 -3
- package/speechflow-cli/src/speechflow-util-llm.ts +1 -1
- package/speechflow-cli/src/speechflow-util-misc.ts +63 -6
- package/speechflow-cli/src/speechflow-util-queue.ts +60 -81
- package/speechflow-cli/src/speechflow-util-stream.ts +40 -8
- package/speechflow-cli/src/speechflow-util.ts +1 -1
- package/speechflow-cli/src/speechflow.ts +1 -1
- package/speechflow-ui-db/dst/index.html +1 -1
- package/speechflow-ui-db/dst/index.js +15 -15
- package/speechflow-ui-db/etc/eslint.mjs +1 -1
- package/speechflow-ui-db/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-db/etc/stx.conf +1 -1
- package/speechflow-ui-db/etc/stylelint.js +1 -1
- package/speechflow-ui-db/etc/stylelint.yaml +1 -1
- package/speechflow-ui-db/etc/vite-client.mts +1 -1
- package/speechflow-ui-db/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-db/package.json +22 -16
- package/speechflow-ui-db/src/app.styl +1 -1
- package/speechflow-ui-db/src/app.vue +1 -1
- package/speechflow-ui-db/src/index.html +1 -1
- package/speechflow-ui-db/src/index.ts +1 -1
- package/speechflow-ui-st/dst/index.html +1 -1
- package/speechflow-ui-st/dst/index.js +31 -31
- package/speechflow-ui-st/etc/eslint.mjs +1 -1
- package/speechflow-ui-st/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-st/etc/stx.conf +1 -1
- package/speechflow-ui-st/etc/stylelint.js +1 -1
- package/speechflow-ui-st/etc/stylelint.yaml +1 -1
- package/speechflow-ui-st/etc/vite-client.mts +1 -1
- package/speechflow-ui-st/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-st/package.json +23 -17
- package/speechflow-ui-st/src/app.styl +1 -1
- package/speechflow-ui-st/src/app.vue +1 -1
- package/speechflow-ui-st/src/index.html +1 -1
- package/speechflow-ui-st/src/index.ts +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -25,7 +25,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
25
25
|
/* internal state */
|
|
26
26
|
private openai: OpenAI | null = null
|
|
27
27
|
private ws: ws.WebSocket | null = null
|
|
28
|
-
private queue: util.
|
|
28
|
+
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
29
29
|
private resampler: SpeexResampler | null = null
|
|
30
30
|
private closing = false
|
|
31
31
|
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
@@ -67,7 +67,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
67
67
|
this.closing = false
|
|
68
68
|
|
|
69
69
|
/* create queue for results */
|
|
70
|
-
this.queue = new util.
|
|
70
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
71
71
|
|
|
72
72
|
/* create a store for the meta information */
|
|
73
73
|
const metastore = new util.TimeStore<Map<string, any>>()
|
|
@@ -139,10 +139,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
139
139
|
})
|
|
140
140
|
|
|
141
141
|
/* hook onto session events */
|
|
142
|
-
this.ws.on("open", () => {
|
|
143
|
-
this.log("info", "WebSocket connection opened")
|
|
144
|
-
sendMessage({ type: "transcription.create" })
|
|
145
|
-
})
|
|
146
142
|
this.ws.on("close", () => {
|
|
147
143
|
this.log("info", "WebSocket connection closed")
|
|
148
144
|
if (!this.closing && this.queue !== null)
|
|
@@ -167,8 +163,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
167
163
|
}, new Map<string, any>())
|
|
168
164
|
}
|
|
169
165
|
|
|
170
|
-
/*
|
|
171
|
-
|
|
166
|
+
/* remember opening time to receive time zero offset */
|
|
167
|
+
this.timeOpen = DateTime.now()
|
|
168
|
+
|
|
169
|
+
/* track transcription text per item */
|
|
170
|
+
const textByItem = new Map<string, string>()
|
|
172
171
|
this.ws.on("message", (data) => {
|
|
173
172
|
let ev: Record<string, unknown>
|
|
174
173
|
try {
|
|
@@ -186,13 +185,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
186
185
|
case "transcription_session.created":
|
|
187
186
|
break
|
|
188
187
|
case "conversation.item.created": {
|
|
189
|
-
|
|
188
|
+
const itemId = (ev.item as Record<string, unknown>)?.id as string
|
|
189
|
+
if (itemId)
|
|
190
|
+
textByItem.set(itemId, "")
|
|
190
191
|
break
|
|
191
192
|
}
|
|
192
193
|
case "conversation.item.input_audio_transcription.delta": {
|
|
193
|
-
|
|
194
|
+
const itemId = ev.item_id as string
|
|
195
|
+
const text = (textByItem.get(itemId) ?? "") + (ev.delta as string)
|
|
196
|
+
textByItem.set(itemId, text)
|
|
194
197
|
if (this.params.interim && !this.closing && this.queue !== null) {
|
|
195
|
-
const itemId = ev.item_id as string
|
|
196
198
|
const timing = speechTiming.get(itemId)
|
|
197
199
|
const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
198
200
|
const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
|
|
@@ -204,7 +206,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
204
206
|
}
|
|
205
207
|
case "conversation.item.input_audio_transcription.completed": {
|
|
206
208
|
if (!this.closing && this.queue !== null) {
|
|
207
|
-
text
|
|
209
|
+
const text = ev.transcript as string
|
|
208
210
|
const itemId = ev.item_id as string
|
|
209
211
|
const timing = speechTiming.get(itemId)
|
|
210
212
|
const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
@@ -213,8 +215,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
213
215
|
chunk.meta = aggregateMeta(start, end)
|
|
214
216
|
metastore.prune(start)
|
|
215
217
|
speechTiming.delete(itemId)
|
|
218
|
+
textByItem.delete(itemId)
|
|
216
219
|
this.queue.write(chunk)
|
|
217
|
-
text = ""
|
|
218
220
|
}
|
|
219
221
|
break
|
|
220
222
|
}
|
|
@@ -248,9 +250,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
248
250
|
}
|
|
249
251
|
})
|
|
250
252
|
|
|
251
|
-
/* remember opening time to receive time zero offset */
|
|
252
|
-
this.timeOpen = DateTime.now()
|
|
253
|
-
|
|
254
253
|
/* provide Duplex stream and internally attach to OpenAI API */
|
|
255
254
|
const self = this
|
|
256
255
|
const reads = new util.PromiseSet<void>()
|
|
@@ -260,7 +259,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
260
259
|
decodeStrings: false,
|
|
261
260
|
highWaterMark: 1,
|
|
262
261
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
263
|
-
if (self.closing || self.ws === null) {
|
|
262
|
+
if (self.closing || self.ws === null || self.resampler === null) {
|
|
264
263
|
callback(new Error("stream already destroyed"))
|
|
265
264
|
return
|
|
266
265
|
}
|
|
@@ -274,7 +273,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
274
273
|
if (chunk.meta.size > 0)
|
|
275
274
|
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
276
275
|
try {
|
|
277
|
-
const payload = self.resampler
|
|
276
|
+
const payload = self.resampler.processChunk(chunk.payload)
|
|
278
277
|
const audioB64 = payload.toString("base64")
|
|
279
278
|
sendMessage({
|
|
280
279
|
type: "input_audio_buffer.append",
|
|
@@ -296,17 +295,23 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
296
295
|
}
|
|
297
296
|
try {
|
|
298
297
|
sendMessage({ type: "input_audio_buffer.commit" })
|
|
299
|
-
self.ws
|
|
300
|
-
await
|
|
298
|
+
self.ws?.close()
|
|
299
|
+
await new Promise<void>((resolve) => {
|
|
300
|
+
const timeout = setTimeout(() => { resolve() }, 5000)
|
|
301
|
+
self.ws?.once("close", () => {
|
|
302
|
+
clearTimeout(timeout)
|
|
303
|
+
resolve()
|
|
304
|
+
})
|
|
305
|
+
})
|
|
301
306
|
}
|
|
302
307
|
catch (error) {
|
|
303
308
|
self.log("warning", `error closing OpenAI connection: ${error}`)
|
|
304
309
|
}
|
|
310
|
+
|
|
311
|
+
/* await all read operations */
|
|
305
312
|
await reads.awaitAll()
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
this.push(chunk)
|
|
309
|
-
this.push(null)
|
|
313
|
+
|
|
314
|
+
/* NOTICE: do not push null here -- let the WebSocket close event handle it */
|
|
310
315
|
callback()
|
|
311
316
|
},
|
|
312
317
|
read (size) {
|
|
@@ -346,6 +351,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
346
351
|
this.connectionTimeout = null
|
|
347
352
|
}
|
|
348
353
|
|
|
354
|
+
/* shutdown stream */
|
|
355
|
+
if (this.stream !== null) {
|
|
356
|
+
await util.destroyStream(this.stream)
|
|
357
|
+
this.stream = null
|
|
358
|
+
}
|
|
359
|
+
|
|
349
360
|
/* signal EOF to any pending read operations */
|
|
350
361
|
if (this.queue !== null) {
|
|
351
362
|
this.queue.write(null)
|
|
@@ -362,12 +373,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
362
373
|
this.openai = null
|
|
363
374
|
|
|
364
375
|
/* close resampler */
|
|
365
|
-
this.resampler
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
if (this.stream !== null) {
|
|
369
|
-
await util.destroyStream(this.stream)
|
|
370
|
-
this.stream = null
|
|
376
|
+
if (this.resampler !== null) {
|
|
377
|
+
this.resampler.destroy()
|
|
378
|
+
this.resampler = null
|
|
371
379
|
}
|
|
372
380
|
}
|
|
373
381
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -131,9 +131,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
131
131
|
else if (chunk.payload === "")
|
|
132
132
|
callback()
|
|
133
133
|
else {
|
|
134
|
+
let callbackCalled = false
|
|
134
135
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
135
136
|
processTimeout = null
|
|
136
|
-
|
|
137
|
+
if (!callbackCalled) {
|
|
138
|
+
callbackCalled = true
|
|
139
|
+
callback(new Error("AWS Polly API timeout"))
|
|
140
|
+
}
|
|
137
141
|
}, 60 * 1000)
|
|
138
142
|
const clearProcessTimeout = () => {
|
|
139
143
|
if (processTimeout !== null) {
|
|
@@ -143,8 +147,11 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
143
147
|
}
|
|
144
148
|
self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
|
|
145
149
|
textToSpeech(chunk.payload as string).then((buffer) => {
|
|
150
|
+
clearProcessTimeout()
|
|
151
|
+
if (callbackCalled)
|
|
152
|
+
return
|
|
153
|
+
callbackCalled = true
|
|
146
154
|
if (self.closing) {
|
|
147
|
-
clearProcessTimeout()
|
|
148
155
|
callback(new Error("stream destroyed during processing"))
|
|
149
156
|
return
|
|
150
157
|
}
|
|
@@ -157,11 +164,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
157
164
|
chunkNew.type = "audio"
|
|
158
165
|
chunkNew.payload = buffer
|
|
159
166
|
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
160
|
-
clearProcessTimeout()
|
|
161
167
|
this.push(chunkNew)
|
|
162
168
|
callback()
|
|
163
169
|
}).catch((error: unknown) => {
|
|
164
170
|
clearProcessTimeout()
|
|
171
|
+
if (callbackCalled)
|
|
172
|
+
return
|
|
173
|
+
callbackCalled = true
|
|
165
174
|
callback(util.ensureError(error, "AWS Polly processing failed"))
|
|
166
175
|
})
|
|
167
176
|
}
|
|
@@ -184,8 +193,10 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
184
193
|
}
|
|
185
194
|
|
|
186
195
|
/* destroy resampler */
|
|
187
|
-
if (this.resampler !== null)
|
|
196
|
+
if (this.resampler !== null) {
|
|
197
|
+
this.resampler.destroy()
|
|
188
198
|
this.resampler = null
|
|
199
|
+
}
|
|
189
200
|
|
|
190
201
|
/* destroy AWS Polly API */
|
|
191
202
|
if (this.client !== null) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -150,9 +150,13 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
150
150
|
else if (chunk.payload === "")
|
|
151
151
|
callback()
|
|
152
152
|
else {
|
|
153
|
+
let callbackCalled = false
|
|
153
154
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
154
155
|
processTimeout = null
|
|
155
|
-
|
|
156
|
+
if (!callbackCalled) {
|
|
157
|
+
callbackCalled = true
|
|
158
|
+
callback(new Error("ElevenLabs API timeout"))
|
|
159
|
+
}
|
|
156
160
|
}, 60 * 1000)
|
|
157
161
|
const clearProcessTimeout = () => {
|
|
158
162
|
if (processTimeout !== null) {
|
|
@@ -163,13 +167,17 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
163
167
|
try {
|
|
164
168
|
if (self.closing) {
|
|
165
169
|
clearProcessTimeout()
|
|
170
|
+
callbackCalled = true
|
|
166
171
|
callback(new Error("stream destroyed during processing"))
|
|
167
172
|
return
|
|
168
173
|
}
|
|
169
174
|
const stream = await speechStream(chunk.payload as string)
|
|
170
175
|
const buffer = await getStreamAsBuffer(stream)
|
|
176
|
+
clearProcessTimeout()
|
|
177
|
+
if (callbackCalled)
|
|
178
|
+
return
|
|
179
|
+
callbackCalled = true
|
|
171
180
|
if (self.closing) {
|
|
172
|
-
clearProcessTimeout()
|
|
173
181
|
callback(new Error("stream destroyed during processing"))
|
|
174
182
|
return
|
|
175
183
|
}
|
|
@@ -187,12 +195,14 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
187
195
|
chunkNew.type = "audio"
|
|
188
196
|
chunkNew.payload = bufferResampled
|
|
189
197
|
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
190
|
-
clearProcessTimeout()
|
|
191
198
|
this.push(chunkNew)
|
|
192
199
|
callback()
|
|
193
200
|
}
|
|
194
201
|
catch (error) {
|
|
195
202
|
clearProcessTimeout()
|
|
203
|
+
if (callbackCalled)
|
|
204
|
+
return
|
|
205
|
+
callbackCalled = true
|
|
196
206
|
callback(util.ensureError(error, "ElevenLabs processing failed"))
|
|
197
207
|
}
|
|
198
208
|
}
|
|
@@ -215,8 +225,10 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
215
225
|
}
|
|
216
226
|
|
|
217
227
|
/* destroy resampler */
|
|
218
|
-
if (this.resampler !== null)
|
|
228
|
+
if (this.resampler !== null) {
|
|
229
|
+
this.resampler.destroy()
|
|
219
230
|
this.resampler = null
|
|
231
|
+
}
|
|
220
232
|
|
|
221
233
|
/* destroy ElevenLabs API */
|
|
222
234
|
if (this.elevenlabs !== null)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -129,9 +129,13 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
129
129
|
else if (chunk.payload === "")
|
|
130
130
|
callback()
|
|
131
131
|
else {
|
|
132
|
+
let callbackCalled = false
|
|
132
133
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
133
134
|
processTimeout = null
|
|
134
|
-
|
|
135
|
+
if (!callbackCalled) {
|
|
136
|
+
callbackCalled = true
|
|
137
|
+
callback(new Error("Google TTS API timeout"))
|
|
138
|
+
}
|
|
135
139
|
}, 60 * 1000)
|
|
136
140
|
const clearProcessTimeout = () => {
|
|
137
141
|
if (processTimeout !== null) {
|
|
@@ -142,12 +146,16 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
142
146
|
try {
|
|
143
147
|
if (self.closing) {
|
|
144
148
|
clearProcessTimeout()
|
|
149
|
+
callbackCalled = true
|
|
145
150
|
callback(new Error("stream destroyed during processing"))
|
|
146
151
|
return
|
|
147
152
|
}
|
|
148
153
|
const buffer = await textToSpeech(chunk.payload as string)
|
|
154
|
+
clearProcessTimeout()
|
|
155
|
+
if (callbackCalled)
|
|
156
|
+
return
|
|
157
|
+
callbackCalled = true
|
|
149
158
|
if (self.closing) {
|
|
150
|
-
clearProcessTimeout()
|
|
151
159
|
callback(new Error("stream destroyed during processing"))
|
|
152
160
|
return
|
|
153
161
|
}
|
|
@@ -161,12 +169,14 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
161
169
|
chunkNew.type = "audio"
|
|
162
170
|
chunkNew.payload = buffer
|
|
163
171
|
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
164
|
-
clearProcessTimeout()
|
|
165
172
|
this.push(chunkNew)
|
|
166
173
|
callback()
|
|
167
174
|
}
|
|
168
175
|
catch (error) {
|
|
169
176
|
clearProcessTimeout()
|
|
177
|
+
if (callbackCalled)
|
|
178
|
+
return
|
|
179
|
+
callbackCalled = true
|
|
170
180
|
callback(util.ensureError(error, "Google TTS processing failed"))
|
|
171
181
|
}
|
|
172
182
|
}
|
|
@@ -189,8 +199,10 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
189
199
|
}
|
|
190
200
|
|
|
191
201
|
/* destroy resampler */
|
|
192
|
-
if (this.resampler !== null)
|
|
202
|
+
if (this.resampler !== null) {
|
|
203
|
+
this.resampler.destroy()
|
|
193
204
|
this.resampler = null
|
|
205
|
+
}
|
|
194
206
|
|
|
195
207
|
/* destroy Google TTS client */
|
|
196
208
|
if (this.client !== null) {
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { KittenTTS } from "kitten-tts-js"
|
|
12
|
+
import { Duration } from "luxon"
|
|
13
|
+
import SpeexResampler from "speex-resampler"
|
|
14
|
+
|
|
15
|
+
/* internal dependencies */
|
|
16
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
17
|
+
import * as util from "./speechflow-util"
|
|
18
|
+
|
|
19
|
+
/* SpeechFlow node for Kitten text-to-speech conversion */
|
|
20
|
+
export default class SpeechFlowNodeT2AKitten extends SpeechFlowNode {
|
|
21
|
+
/* declare official node name */
|
|
22
|
+
public static name = "t2a-kitten"
|
|
23
|
+
|
|
24
|
+
/* internal state */
|
|
25
|
+
private kitten: KittenTTS | null = null
|
|
26
|
+
private resampler: SpeexResampler | null = null
|
|
27
|
+
private closing = false
|
|
28
|
+
|
|
29
|
+
/* construct node */
|
|
30
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
31
|
+
super(id, cfg, opts, args)
|
|
32
|
+
|
|
33
|
+
/* declare node configuration parameters */
|
|
34
|
+
this.configure({
|
|
35
|
+
model: { type: "string", val: "KittenML/kitten-tts-nano-0.8", pos: 0, match: /^.+$/ },
|
|
36
|
+
voice: { type: "string", val: "Bruno", pos: 1, match: /^(?:Bella|Jasper|Luna|Bruno|Rosie|Hugo|Kiki|Leo)$/ },
|
|
37
|
+
speed: { type: "number", val: 1.25, pos: 2, match: (n: number) => n >= 0.5 && n <= 2.0 }
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
/* declare node input/output format */
|
|
41
|
+
this.input = "text"
|
|
42
|
+
this.output = "audio"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* one-time status of node */
|
|
46
|
+
async status () {
|
|
47
|
+
return {}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/* open node */
|
|
51
|
+
async open () {
|
|
52
|
+
/* clear destruction flag */
|
|
53
|
+
this.closing = false
|
|
54
|
+
|
|
55
|
+
/* establish Kitten TTS */
|
|
56
|
+
this.kitten = await KittenTTS.from_pretrained(this.params.model)
|
|
57
|
+
if (this.kitten === null)
|
|
58
|
+
throw new Error("failed to instantiate Kitten TTS")
|
|
59
|
+
|
|
60
|
+
/* establish resampler from Kitten's 24Khz
|
|
61
|
+
output to our standard audio sample rate (48KHz) */
|
|
62
|
+
this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
|
|
63
|
+
|
|
64
|
+
/* perform text-to-speech operation with Kitten TTS API */
|
|
65
|
+
const text2speech = async (text: string) => {
|
|
66
|
+
this.log("info", `Kitten TTS: input: "${text}"`)
|
|
67
|
+
const audio = await this.kitten!.generate(text, {
|
|
68
|
+
voice: this.params.voice,
|
|
69
|
+
speed: this.params.speed
|
|
70
|
+
})
|
|
71
|
+
if (audio.sampling_rate !== 24000)
|
|
72
|
+
throw new Error("expected 24KHz sampling rate in Kitten TTS output")
|
|
73
|
+
|
|
74
|
+
/* convert audio samples from PCM/F32/24Khz to PCM/I16/24KHz */
|
|
75
|
+
const samples = audio.data
|
|
76
|
+
const buffer1 = Buffer.alloc(samples.length * 2)
|
|
77
|
+
for (let i = 0; i < samples.length; i++) {
|
|
78
|
+
const sample = Math.max(-1, Math.min(1, samples[i]))
|
|
79
|
+
buffer1.writeInt16LE(sample * 0x7FFF, i * 2)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
|
|
83
|
+
if (this.resampler === null)
|
|
84
|
+
throw new Error("resampler already destroyed")
|
|
85
|
+
return this.resampler.processChunk(buffer1)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/* create transform stream and connect it to the Kitten TTS API */
|
|
89
|
+
const self = this
|
|
90
|
+
this.stream = new Stream.Transform({
|
|
91
|
+
writableObjectMode: true,
|
|
92
|
+
readableObjectMode: true,
|
|
93
|
+
decodeStrings: false,
|
|
94
|
+
highWaterMark: 1,
|
|
95
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
96
|
+
if (self.closing)
|
|
97
|
+
callback(new Error("stream already destroyed"))
|
|
98
|
+
else if (Buffer.isBuffer(chunk.payload))
|
|
99
|
+
callback(new Error("invalid chunk payload type"))
|
|
100
|
+
else if (chunk.payload === "")
|
|
101
|
+
callback()
|
|
102
|
+
else {
|
|
103
|
+
let callbackCalled = false
|
|
104
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
105
|
+
processTimeout = null
|
|
106
|
+
if (!callbackCalled) {
|
|
107
|
+
callbackCalled = true
|
|
108
|
+
callback(new Error("Kitten TTS timeout"))
|
|
109
|
+
}
|
|
110
|
+
}, 60 * 1000)
|
|
111
|
+
const clearProcessTimeout = () => {
|
|
112
|
+
if (processTimeout !== null) {
|
|
113
|
+
clearTimeout(processTimeout)
|
|
114
|
+
processTimeout = null
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
text2speech(chunk.payload).then((buffer) => {
|
|
118
|
+
clearProcessTimeout()
|
|
119
|
+
if (callbackCalled)
|
|
120
|
+
return
|
|
121
|
+
callbackCalled = true
|
|
122
|
+
if (self.closing) {
|
|
123
|
+
callback(new Error("stream destroyed during processing"))
|
|
124
|
+
return
|
|
125
|
+
}
|
|
126
|
+
self.log("info", `Kitten TTS: received audio (buffer length: ${buffer.byteLength})`)
|
|
127
|
+
|
|
128
|
+
/* calculate actual audio duration from PCM buffer size */
|
|
129
|
+
const durationMs = util.audioBufferDuration(buffer,
|
|
130
|
+
self.config.audioSampleRate, self.config.audioBitDepth) * 1000
|
|
131
|
+
|
|
132
|
+
/* create new chunk with recalculated timestamps */
|
|
133
|
+
const chunkNew = chunk.clone()
|
|
134
|
+
chunkNew.type = "audio"
|
|
135
|
+
chunkNew.payload = buffer
|
|
136
|
+
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
137
|
+
this.push(chunkNew)
|
|
138
|
+
callback()
|
|
139
|
+
}).catch((error: unknown) => {
|
|
140
|
+
clearProcessTimeout()
|
|
141
|
+
if (callbackCalled)
|
|
142
|
+
return
|
|
143
|
+
callbackCalled = true
|
|
144
|
+
callback(util.ensureError(error, "Kitten TTS processing failed"))
|
|
145
|
+
})
|
|
146
|
+
}
|
|
147
|
+
},
|
|
148
|
+
final (callback) {
|
|
149
|
+
callback()
|
|
150
|
+
}
|
|
151
|
+
})
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/* close node */
|
|
155
|
+
async close () {
|
|
156
|
+
/* indicate closing */
|
|
157
|
+
this.closing = true
|
|
158
|
+
|
|
159
|
+
/* shutdown stream */
|
|
160
|
+
if (this.stream !== null) {
|
|
161
|
+
await util.destroyStream(this.stream)
|
|
162
|
+
this.stream = null
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/* destroy resampler */
|
|
166
|
+
if (this.resampler !== null) {
|
|
167
|
+
this.resampler.destroy()
|
|
168
|
+
this.resampler = null
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/* destroy Kitten TTS API */
|
|
172
|
+
if (this.kitten !== null) {
|
|
173
|
+
await this.kitten.release()
|
|
174
|
+
this.kitten = null
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -81,11 +81,12 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
81
81
|
this.kokoro = await KokoroTTS.from_pretrained(model, {
|
|
82
82
|
dtype: "q4f16",
|
|
83
83
|
progress_callback: progressCallback
|
|
84
|
+
}).finally(() => {
|
|
85
|
+
if (interval !== null) {
|
|
86
|
+
clearInterval(interval)
|
|
87
|
+
interval = null
|
|
88
|
+
}
|
|
84
89
|
})
|
|
85
|
-
if (interval !== null) {
|
|
86
|
-
clearInterval(interval)
|
|
87
|
-
interval = null
|
|
88
|
-
}
|
|
89
90
|
if (this.kokoro === null)
|
|
90
91
|
throw new Error("failed to instantiate Kokoro")
|
|
91
92
|
|
|
@@ -141,9 +142,13 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
141
142
|
else if (chunk.payload === "")
|
|
142
143
|
callback()
|
|
143
144
|
else {
|
|
145
|
+
let callbackCalled = false
|
|
144
146
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
145
147
|
processTimeout = null
|
|
146
|
-
|
|
148
|
+
if (!callbackCalled) {
|
|
149
|
+
callbackCalled = true
|
|
150
|
+
callback(new Error("Kokoro TTS timeout"))
|
|
151
|
+
}
|
|
147
152
|
}, 60 * 1000)
|
|
148
153
|
const clearProcessTimeout = () => {
|
|
149
154
|
if (processTimeout !== null) {
|
|
@@ -152,8 +157,11 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
152
157
|
}
|
|
153
158
|
}
|
|
154
159
|
text2speech(chunk.payload).then((buffer) => {
|
|
160
|
+
clearProcessTimeout()
|
|
161
|
+
if (callbackCalled)
|
|
162
|
+
return
|
|
163
|
+
callbackCalled = true
|
|
155
164
|
if (self.closing) {
|
|
156
|
-
clearProcessTimeout()
|
|
157
165
|
callback(new Error("stream destroyed during processing"))
|
|
158
166
|
return
|
|
159
167
|
}
|
|
@@ -168,11 +176,13 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
168
176
|
chunkNew.type = "audio"
|
|
169
177
|
chunkNew.payload = buffer
|
|
170
178
|
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
171
|
-
clearProcessTimeout()
|
|
172
179
|
this.push(chunkNew)
|
|
173
180
|
callback()
|
|
174
181
|
}).catch((error: unknown) => {
|
|
175
182
|
clearProcessTimeout()
|
|
183
|
+
if (callbackCalled)
|
|
184
|
+
return
|
|
185
|
+
callbackCalled = true
|
|
176
186
|
callback(util.ensureError(error, "Kokoro processing failed"))
|
|
177
187
|
})
|
|
178
188
|
}
|
|
@@ -195,8 +205,10 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
195
205
|
}
|
|
196
206
|
|
|
197
207
|
/* destroy resampler */
|
|
198
|
-
if (this.resampler !== null)
|
|
208
|
+
if (this.resampler !== null) {
|
|
209
|
+
this.resampler.destroy()
|
|
199
210
|
this.resampler = null
|
|
211
|
+
}
|
|
200
212
|
|
|
201
213
|
/* destroy Kokoro API */
|
|
202
214
|
if (this.kokoro !== null)
|