npm - speechflow - Versions diffs - 2.2.1 → 2.3.0 - Mend

speechflow 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (235) hide show

package/speechflow-cli/src/speechflow-node-a2t-openai.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -25,7 +25,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
     /*  internal state  */
     private openai:            OpenAI                                    | null = null
     private ws:                ws.WebSocket                              | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null>  | null = null
+    private queue:             util.AsyncQueue<SpeechFlowChunk | null>   | null = null
     private resampler:         SpeexResampler                            | null = null
     private closing                                                             = false
     private connectionTimeout: ReturnType<typeof setTimeout>             | null = null
@@ -67,7 +67,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
         this.closing = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -139,10 +139,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
         })
         /*  hook onto session events  */
-        this.ws.on("open", () => {
-            this.log("info", "WebSocket connection opened")
-            sendMessage({ type: "transcription.create" })
-        })
         this.ws.on("close", () => {
             this.log("info", "WebSocket connection closed")
             if (!this.closing && this.queue !== null)
@@ -167,8 +163,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             }, new Map<string, any>())
         }
-        /*  track transcription text  */
-        let text = ""
+        /*  remember opening time to receive time zero offset  */
+        this.timeOpen = DateTime.now()
+        /*  track transcription text per item  */
+        const textByItem = new Map<string, string>()
         this.ws.on("message", (data) => {
             let ev: Record<string, unknown>
             try {
@@ -186,13 +185,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                 case "transcription_session.created":
                     break
                 case "conversation.item.created": {
-                    text = ""
+                    const itemId = (ev.item as Record<string, unknown>)?.id as string
+                    if (itemId)
+                        textByItem.set(itemId, "")
                     break
                 }
                 case "conversation.item.input_audio_transcription.delta": {
-                    text += ev.delta as string
+                    const itemId = ev.item_id as string
+                    const text   = (textByItem.get(itemId) ?? "") + (ev.delta as string)
+                    textByItem.set(itemId, text)
                     if (this.params.interim && !this.closing && this.queue !== null) {
-                        const itemId = ev.item_id as string
                         const timing = speechTiming.get(itemId)
                         const start  = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
                         const end    = timing !== undefined ? Duration.fromMillis(timing.endMs)   : start
@@ -204,7 +206,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                 }
                 case "conversation.item.input_audio_transcription.completed": {
                     if (!this.closing && this.queue !== null) {
-                        text = ev.transcript as string
+                        const text   = ev.transcript as string
                         const itemId = ev.item_id as string
                         const timing = speechTiming.get(itemId)
                         const start  = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
@@ -213,8 +215,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                         chunk.meta = aggregateMeta(start, end)
                         metastore.prune(start)
                         speechTiming.delete(itemId)
+                        textByItem.delete(itemId)
                         this.queue.write(chunk)
-                        text = ""
                     }
                     break
                 }
@@ -248,9 +250,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             }
         })
-        /*  remember opening time to receive time zero offset  */
-        this.timeOpen = DateTime.now()
         /*  provide Duplex stream and internally attach to OpenAI API  */
         const self = this
         const reads = new util.PromiseSet<void>()
@@ -260,7 +259,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             decodeStrings:      false,
             highWaterMark:      1,
             write (chunk: SpeechFlowChunk, encoding, callback) {
-                if (self.closing || self.ws === null) {
+                if (self.closing || self.ws === null || self.resampler === null) {
                     callback(new Error("stream already destroyed"))
                     return
                 }
@@ -274,7 +273,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                         if (chunk.meta.size > 0)
                             metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
                         try {
-                            const payload = self.resampler!.processChunk(chunk.payload)
+                            const payload = self.resampler.processChunk(chunk.payload)
                             const audioB64 = payload.toString("base64")
                             sendMessage({
                                 type: "input_audio_buffer.append",
@@ -296,17 +295,23 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                 }
                 try {
                     sendMessage({ type: "input_audio_buffer.commit" })
-                    self.ws.close()
-                    await util.sleep(50)
+                    self.ws?.close()
+                    await new Promise<void>((resolve) => {
+                        const timeout = setTimeout(() => { resolve() }, 5000)
+                        self.ws?.once("close", () => {
+                            clearTimeout(timeout)
+                            resolve()
+                        })
+                    })
                 }
                 catch (error) {
                     self.log("warning", `error closing OpenAI connection: ${error}`)
                 }
+                /*  await all read operations  */
                 await reads.awaitAll()
-                const chunks: Array<SpeechFlowChunk | null> = self.queue?.drain() ?? []
-                for (const chunk of chunks)
-                    this.push(chunk)
-                this.push(null)
+                /*  NOTICE: do not push null here -- let the WebSocket close event handle it  */
                 callback()
             },
             read (size) {
@@ -346,6 +351,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             this.connectionTimeout = null
         }
+        /*  shutdown stream  */
+        if (this.stream !== null) {
+            await util.destroyStream(this.stream)
+            this.stream = null
+        }
         /*  signal EOF to any pending read operations  */
         if (this.queue !== null) {
             this.queue.write(null)
@@ -362,12 +373,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             this.openai = null
         /*  close resampler  */
-        this.resampler = null
-        /*  shutdown stream  */
-        if (this.stream !== null) {
-            await util.destroyStream(this.stream)
-            this.stream = null
+        if (this.resampler !== null) {
+            this.resampler.destroy()
+            this.resampler = null
         }
     }
 }

package/speechflow-cli/src/speechflow-node-t2a-amazon.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -131,9 +131,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                 else if (chunk.payload === "")
                     callback()
                 else {
+                    let callbackCalled = false
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null
-                        callback(new Error("AWS Polly API timeout"))
+                        if (!callbackCalled) {
+                            callbackCalled = true
+                            callback(new Error("AWS Polly API timeout"))
+                        }
                     }, 60 * 1000)
                     const clearProcessTimeout = () => {
                         if (processTimeout !== null) {
@@ -143,8 +147,11 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                     }
                     self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
                     textToSpeech(chunk.payload as string).then((buffer) => {
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         if (self.closing) {
-                            clearProcessTimeout()
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
@@ -157,11 +164,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                         chunkNew.type         = "audio"
                         chunkNew.payload      = buffer
                         chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
-                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }).catch((error: unknown) => {
                         clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         callback(util.ensureError(error, "AWS Polly processing failed"))
                     })
                 }
@@ -184,8 +193,10 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
         }
         /*  destroy resampler  */
-        if (this.resampler !== null)
+        if (this.resampler !== null) {
+            this.resampler.destroy()
             this.resampler = null
+        }
         /*  destroy AWS Polly API  */
         if (this.client !== null) {

package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -150,9 +150,13 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
                 else if (chunk.payload === "")
                     callback()
                 else {
+                    let callbackCalled = false
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null
-                        callback(new Error("ElevenLabs API timeout"))
+                        if (!callbackCalled) {
+                            callbackCalled = true
+                            callback(new Error("ElevenLabs API timeout"))
+                        }
                     }, 60 * 1000)
                     const clearProcessTimeout = () => {
                         if (processTimeout !== null) {
@@ -163,13 +167,17 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
                     try {
                         if (self.closing) {
                             clearProcessTimeout()
+                            callbackCalled = true
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
                         const stream = await speechStream(chunk.payload as string)
                         const buffer = await getStreamAsBuffer(stream)
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         if (self.closing) {
-                            clearProcessTimeout()
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
@@ -187,12 +195,14 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
                         chunkNew.type         = "audio"
                         chunkNew.payload      = bufferResampled
                         chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
-                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }
                     catch (error) {
                         clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         callback(util.ensureError(error, "ElevenLabs processing failed"))
                     }
                 }
@@ -215,8 +225,10 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
         }
         /*  destroy resampler  */
-        if (this.resampler !== null)
+        if (this.resampler !== null) {
+            this.resampler.destroy()
             this.resampler = null
+        }
         /*  destroy ElevenLabs API  */
         if (this.elevenlabs !== null)

package/speechflow-cli/src/speechflow-node-t2a-google.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -129,9 +129,13 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
                 else if (chunk.payload === "")
                     callback()
                 else {
+                    let callbackCalled = false
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null
-                        callback(new Error("Google TTS API timeout"))
+                        if (!callbackCalled) {
+                            callbackCalled = true
+                            callback(new Error("Google TTS API timeout"))
+                        }
                     }, 60 * 1000)
                     const clearProcessTimeout = () => {
                         if (processTimeout !== null) {
@@ -142,12 +146,16 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
                     try {
                         if (self.closing) {
                             clearProcessTimeout()
+                            callbackCalled = true
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
                         const buffer = await textToSpeech(chunk.payload as string)
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         if (self.closing) {
-                            clearProcessTimeout()
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
@@ -161,12 +169,14 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
                         chunkNew.type         = "audio"
                         chunkNew.payload      = buffer
                         chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
-                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }
                     catch (error) {
                         clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         callback(util.ensureError(error, "Google TTS processing failed"))
                     }
                 }
@@ -189,8 +199,10 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
         }
         /*  destroy resampler  */
-        if (this.resampler !== null)
+        if (this.resampler !== null) {
+            this.resampler.destroy()
             this.resampler = null
+        }
         /*  destroy Google TTS client  */
         if (this.client !== null) {

package/speechflow-cli/src/speechflow-node-t2a-kitten.ts ADDED Viewed

@@ -0,0 +1,178 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream         from "node:stream"
+/*  external dependencies  */
+import { KittenTTS }  from "kitten-tts-js"
+import { Duration }   from "luxon"
+import SpeexResampler from "speex-resampler"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as util                           from "./speechflow-util"
+/*  SpeechFlow node for Kitten text-to-speech conversion  */
+export default class SpeechFlowNodeT2AKitten extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "t2a-kitten"
+    /*  internal state  */
+    private kitten:    KittenTTS      | null = null
+    private resampler: SpeexResampler | null = null
+    private closing                          = false
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            model:    { type: "string", val: "KittenML/kitten-tts-nano-0.8", pos: 0, match: /^.+$/ },
+            voice:    { type: "string", val: "Bruno",  pos: 1, match: /^(?:Bella|Jasper|Luna|Bruno|Rosie|Hugo|Kiki|Leo)$/ },
+            speed:    { type: "number", val: 1.25,     pos: 2, match: (n: number) => n >= 0.5 && n <= 2.0 }
+        })
+        /*  declare node input/output format  */
+        this.input  = "text"
+        this.output = "audio"
+    }
+    /*  one-time status of node  */
+    async status () {
+        return {}
+    }
+    /*  open node  */
+    async open () {
+        /*  clear destruction flag  */
+        this.closing = false
+        /*  establish Kitten TTS  */
+        this.kitten = await KittenTTS.from_pretrained(this.params.model)
+        if (this.kitten === null)
+            throw new Error("failed to instantiate Kitten TTS")
+        /*  establish resampler from Kitten's 24Khz
+            output to our standard audio sample rate (48KHz)  */
+        this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
+        /*  perform text-to-speech operation with Kitten TTS API  */
+        const text2speech = async (text: string) => {
+            this.log("info", `Kitten TTS: input: "${text}"`)
+            const audio = await this.kitten!.generate(text, {
+                voice: this.params.voice,
+                speed: this.params.speed
+            })
+            if (audio.sampling_rate !== 24000)
+                throw new Error("expected 24KHz sampling rate in Kitten TTS output")
+            /*  convert audio samples from PCM/F32/24Khz to PCM/I16/24KHz  */
+            const samples = audio.data
+            const buffer1 = Buffer.alloc(samples.length * 2)
+            for (let i = 0; i < samples.length; i++) {
+                const sample = Math.max(-1, Math.min(1, samples[i]))
+                buffer1.writeInt16LE(sample * 0x7FFF, i * 2)
+            }
+            /*  resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz  */
+            if (this.resampler === null)
+                throw new Error("resampler already destroyed")
+            return this.resampler.processChunk(buffer1)
+        }
+        /*  create transform stream and connect it to the Kitten TTS API  */
+        const self = this
+        this.stream = new Stream.Transform({
+            writableObjectMode: true,
+            readableObjectMode: true,
+            decodeStrings:      false,
+            highWaterMark:      1,
+            transform (chunk: SpeechFlowChunk, encoding, callback) {
+                if (self.closing)
+                    callback(new Error("stream already destroyed"))
+                else if (Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else if (chunk.payload === "")
+                    callback()
+                else {
+                    let callbackCalled = false
+                    let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
+                        processTimeout = null
+                        if (!callbackCalled) {
+                            callbackCalled = true
+                            callback(new Error("Kitten TTS timeout"))
+                        }
+                    }, 60 * 1000)
+                    const clearProcessTimeout = () => {
+                        if (processTimeout !== null) {
+                            clearTimeout(processTimeout)
+                            processTimeout = null
+                        }
+                    }
+                    text2speech(chunk.payload).then((buffer) => {
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
+                        if (self.closing) {
+                            callback(new Error("stream destroyed during processing"))
+                            return
+                        }
+                        self.log("info", `Kitten TTS: received audio (buffer length: ${buffer.byteLength})`)
+                        /*  calculate actual audio duration from PCM buffer size  */
+                        const durationMs = util.audioBufferDuration(buffer,
+                            self.config.audioSampleRate, self.config.audioBitDepth) * 1000
+                        /*  create new chunk with recalculated timestamps  */
+                        const chunkNew        = chunk.clone()
+                        chunkNew.type         = "audio"
+                        chunkNew.payload      = buffer
+                        chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
+                        this.push(chunkNew)
+                        callback()
+                    }).catch((error: unknown) => {
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
+                        callback(util.ensureError(error, "Kitten TTS processing failed"))
+                    })
+                }
+            },
+            final (callback) {
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  indicate closing  */
+        this.closing = true
+        /*  shutdown stream  */
+        if (this.stream !== null) {
+            await util.destroyStream(this.stream)
+            this.stream = null
+        }
+        /*  destroy resampler  */
+        if (this.resampler !== null) {
+            this.resampler.destroy()
+            this.resampler = null
+        }
+        /*  destroy Kitten TTS API  */
+        if (this.kitten !== null) {
+            await this.kitten.release()
+            this.kitten = null
+        }
+    }
+}

package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -81,11 +81,12 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
         this.kokoro = await KokoroTTS.from_pretrained(model, {
             dtype: "q4f16",
             progress_callback: progressCallback
+        }).finally(() => {
+            if (interval !== null) {
+                clearInterval(interval)
+                interval = null
+            }
         })
-        if (interval !== null) {
-            clearInterval(interval)
-            interval = null
-        }
         if (this.kokoro === null)
             throw new Error("failed to instantiate Kokoro")
@@ -141,9 +142,13 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
                 else if (chunk.payload === "")
                     callback()
                 else {
+                    let callbackCalled = false
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null
-                        callback(new Error("Kokoro TTS timeout"))
+                        if (!callbackCalled) {
+                            callbackCalled = true
+                            callback(new Error("Kokoro TTS timeout"))
+                        }
                     }, 60 * 1000)
                     const clearProcessTimeout = () => {
                         if (processTimeout !== null) {
@@ -152,8 +157,11 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
                         }
                     }
                     text2speech(chunk.payload).then((buffer) => {
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         if (self.closing) {
-                            clearProcessTimeout()
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
@@ -168,11 +176,13 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
                         chunkNew.type         = "audio"
                         chunkNew.payload      = buffer
                         chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
-                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }).catch((error: unknown) => {
                         clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         callback(util.ensureError(error, "Kokoro processing failed"))
                     })
                 }
@@ -195,8 +205,10 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
         }
         /*  destroy resampler  */
-        if (this.resampler !== null)
+        if (this.resampler !== null) {
+            this.resampler.destroy()
             this.resampler = null
+        }
         /*  destroy Kokoro API  */
         if (this.kokoro !== null)