npm - speechflow - Versions diffs - 2.2.1 → 2.3.1 - Mend

speechflow 2.2.1 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

package/speechflow-cli/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -258,6 +258,9 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         return
                     }
+                    /*  await forthcoming audio chunks (forward declaration)  */
+                    let awaitForthcomingChunks: () => void = () => {}
                     /*  flush pending audio chunks  */
                     const flushPendingChunks = () => {
                         let pushed = 0
@@ -289,22 +292,22 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                                 this.push(chunk)
                                 pushed++
                             }
-                            else if (self.params.mode === "unplugged" && pushed === 0) {
-                                /*  we have to await chunks now, as in unplugged
-                                    mode we else would be never called again until
-                                    we at least once push a new chunk as the result  */
-                                setTimeout(() => {
-                                    if (self.closing || self.queue === null)
-                                        return
-                                    tryToRead()
-                                }, 0)
-                                return
-                            }
+                        }
+                        /*  in unplugged mode, if no chunk was pushed (all were
+                            non-speech), we need to wait event-driven for new
+                            data, as the stream won't call read() again until
+                            we push something  */
+                        if (pushed === 0
+                            && !self.closing
+                            && !self.activeEventListeners.has(awaitForthcomingChunks)) {
+                            self.queue.once("write", awaitForthcomingChunks)
+                            self.activeEventListeners.add(awaitForthcomingChunks)
                         }
                     }
                     /*  await forthcoming audio chunks  */
-                    const awaitForthcomingChunks = () => {
+                    awaitForthcomingChunks = () => {
                         self.activeEventListeners.delete(awaitForthcomingChunks)
                         if (self.closing)
                             return
@@ -339,16 +342,28 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  indicate closing  */
-        this.closing = true
         /*  cleanup tail timer  */
         if (this.tailTimer !== null) {
             clearTimeout(this.tailTimer)
             this.tailTimer = null
         }
-        /*  remove all event listeners  */
+        /*  flush VAD (before closing, as flush triggers callbacks which need active state)  */
+        if (this.vad !== null) {
+            try {
+                const flushPromise = this.vad.flush()
+                const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
+                await Promise.race([ flushPromise, timeoutPromise ])
+            }
+            catch (error) {
+                this.log("warning", `VAD flush error during close: ${error}`)
+            }
+        }
+        /*  indicate closing  */
+        this.closing = true
+        /*  remove all remaining event listeners  */
         this.activeEventListeners.forEach((listener) => {
             this.queue.removeListener("write", listener)
         })
@@ -360,23 +375,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
             this.stream = null
         }
-        /*  cleanup queue pointers before closing VAD to prevent callback access  */
-        this.queue.pointerDelete("recv")
-        this.queue.pointerDelete("vad")
-        this.queue.pointerDelete("send")
-        /*  close VAD  */
+        /*  destroy VAD  */
         if (this.vad !== null) {
-            try {
-                const flushPromise = this.vad.flush()
-                const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
-                await Promise.race([ flushPromise, timeoutPromise ])
-            }
-            catch (error) {
-                this.log("warning", `VAD flush error during close: ${error}`)
-            }
             this.vad.destroy()
             this.vad = null
         }
+        /*  cleanup queue pointers  */
+        this.queue.pointerDelete("recv")
+        this.queue.pointerDelete("vad")
+        this.queue.pointerDelete("send")
     }
 }

package/speechflow-cli/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -183,9 +183,10 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
                             callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
                             return
                         }
-                        chunk.payload = chunk.payload.subarray(44)
-                        this.push(chunk)
-                        totalSize += chunk.payload.byteLength
+                        const chunkNew = chunk.clone()
+                        chunkNew.payload = chunk.payload.subarray(44)
+                        this.push(chunkNew)
+                        totalSize += chunkNew.payload.byteLength
                         callback()
                     }
                     else {
@@ -210,7 +211,7 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
                         sampleRate:  self.config.audioSampleRate,
                         bitDepth:    self.config.audioBitDepth
                     })
-                    const headerChunk = headerChunkSent?.clone()
+                    const headerChunk = headerChunkSent.clone()
                     headerChunk.payload = headerBuffer
                     headerChunk.meta.set("chunk:seek", 0)
                     this.push(headerChunk)

package/speechflow-cli/src/speechflow-node-a2t-amazon.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -42,6 +42,7 @@ class AsyncQueue<T> {
             resolve?.({ value: null, done: true })
         }
         this.queue.length = 0
+        this.queue.push(null)
     }
     async * [Symbol.asyncIterator] (): AsyncIterator<T> {
         while (true) {
@@ -71,8 +72,9 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
     private client:       TranscribeStreamingClient                | null = null
     private clientStream: AsyncIterable<TranscriptResultStream>    | null = null
     private audioQueue:   AsyncQueue<Uint8Array>                   | null = null
+    private queue:        util.AsyncQueue<SpeechFlowChunk | null>  | null = null
+    private clientStreamStarting                                          = false
     private closing                                                       = false
-    private queue:        util.SingleQueue<SpeechFlowChunk | null> | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -110,10 +112,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
             throw new Error("Amazon Transcribe node currently supports PCM-S16LE audio only")
         /*  clear destruction flag  */
-        this.closing = false
+        this.closing             = false
+        this.clientStreamStarting = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -136,26 +139,36 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
             }
         })(audioQueue)
+        /*  provide a self-reference for use in callbacks below  */
+        const self = this
         /*  start streaming  */
         const ensureAudioStreamActive = async () => {
-            if (this.clientStream !== null || this.closing)
+            if (this.clientStream !== null || this.clientStreamStarting || this.closing)
                 return
-            const language: LanguageCode = this.params.language === "de" ? "de-DE" : "en-US"
-            const command = new StartStreamTranscriptionCommand({
-                LanguageCode: language,
-                EnablePartialResultsStabilization: this.params.interim,
-                ...(this.params.interim ? { PartialResultsStability: "low" } : {}),
-                MediaEncoding: "pcm",
-                MediaSampleRateHertz: this.config.audioSampleRate,
-                AudioStream: audioStream,
-            })
-            const response = await this.client!.send(command)
-            const stream = response.TranscriptResultStream
-            if (!stream)
-                throw new Error("no TranscriptResultStream returned")
-            this.clientStream = stream
+            this.clientStreamStarting = true
+            try {
+                const language: LanguageCode = this.params.language === "de" ? "de-DE" : "en-US"
+                const command = new StartStreamTranscriptionCommand({
+                    LanguageCode: language,
+                    EnablePartialResultsStabilization: this.params.interim,
+                    ...(this.params.interim ? { PartialResultsStability: "low" } : {}),
+                    MediaEncoding: "pcm",
+                    MediaSampleRateHertz: this.config.audioSampleRate,
+                    AudioStream: audioStream,
+                })
+                const response = await this.client!.send(command)
+                const stream = response.TranscriptResultStream
+                if (!stream)
+                    throw new Error("no TranscriptResultStream returned")
+                this.clientStream = stream
+            }
+            catch (err) {
+                this.clientStreamStarting = false
+                throw err
+            }
             ;(async () => {
-                for await (const event of stream) {
+                for await (const event of this.clientStream!) {
                     const te = event.TranscriptEvent
                     if (!te?.Transcript?.Results)
                         continue
@@ -192,8 +205,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                         this.queue?.write(chunk)
                     }
                 }
+                self.queue?.write(null)
             })().catch((err: unknown) => {
                 this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
+                this.clientStream         = null
+                this.clientStreamStarting = false
             })
         }
@@ -201,7 +217,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
         this.timeOpen = DateTime.now()
         /*  provide Duplex stream and internally attach to Amazon Transcribe API  */
-        const self = this
         const reads = new util.PromiseSet<void>()
         this.stream = new Stream.Duplex({
             writableObjectMode: true,
@@ -235,12 +250,18 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                     callback()
                     return
                 }
-                await reads.awaitAll()
+                /*  signal end-of-audio to Amazon Transcribe first  */
+                audioQueue.push(null)
+                /*  await all pending read operations (with safety timeout)  */
+                await reads.awaitAll(5000)
+                /*  clean up Amazon Transcribe connection and audio queue  */
                 util.run("closing Amazon Transcribe connection",
                     () => self.client!.destroy(),
                     (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
                 )
-                audioQueue.push(null) /*  do not push null to stream, let Amazon Transcribe do it  */
                 audioQueue.destroy()
                 callback()
             },
@@ -259,7 +280,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                         this.push(null)
                     }
                     else {
-                        self.log("debug", `received data (${chunk.payload.length} bytes): "${chunk.payload}"`)
+                        self.log("debug", `received data (${chunk.payload.length} bytes)`)
                         this.push(chunk)
                     }
                 }).catch((error: unknown) => {
@@ -273,7 +294,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
     /*  close node  */
     async close () {
         /*  indicate closing first to stop all async operations  */
-        this.closing = true
+        this.closing              = true
+        this.clientStreamStarting = false
         /*  shutdown stream  */
         if (this.stream !== null) {

package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -24,7 +24,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
     private dg:                Deepgram.LiveClient                      | null = null
     private closing                                                            = false
     private connectionTimeout: ReturnType<typeof setTimeout>            | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null> | null = null
+    private queue:             util.AsyncQueue<SpeechFlowChunk | null>  | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -64,7 +64,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                         balance += balanceResponse.result.balances[0]?.amount ?? 0
                 }
             }
-            else if (response?.error !== null)
+            else if (response !== null && response.error !== null)
                 this.log("warning", `API error fetching projects: ${response.error}`)
         }
         catch (error) {
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
         this.closing = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -145,7 +145,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                 { word: string, punctuated_word?: string, start: number, end: number }[]
             const isFinal     = (data.is_final     as boolean) ?? false
             const speechFinal = (data.speech_final as boolean) ?? false
-            const kind = ((interim && isFinal) || (endpointing > 0 && speechFinal)) ? "final" : "intermediate"
+            const kind = (isFinal || (endpointing > 0 && speechFinal)) ? "final" : "intermediate"
             if (text === "")
                 this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
             else {
@@ -206,6 +206,13 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                 }
                 resolve(true)
             })
+            this.dg!.once(Deepgram.LiveTranscriptionEvents.Error, (err: Error) => {
+                if (this.connectionTimeout !== null) {
+                    clearTimeout(this.connectionTimeout)
+                    this.connectionTimeout = null
+                }
+                reject(err)
+            })
         })
         /*  remember opening time to receive time zero offset  */
@@ -234,7 +241,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                         if (chunk.meta.size > 0)
                             metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
                         try {
-                            self.dg.send(chunk.payload.buffer) /* intentionally discard all time information */
+                            /*  send buffer (and intentionally discard all time information)  */
+                            self.dg.send(chunk.payload.buffer.slice(
+                                chunk.payload.byteOffset,
+                                chunk.payload.byteOffset + chunk.payload.byteLength
+                            ))
                         }
                         catch (error) {
                             callback(util.ensureError(error, "failed to send to Deepgram"))

package/speechflow-cli/src/speechflow-node-a2t-google.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -24,8 +24,9 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
     /*  internal state  */
     private client:          GoogleSpeech.SpeechClient                                   | null = null
     private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
-    private queue:           util.SingleQueue<SpeechFlowChunk | null>                    | null = null
+    private queue:           util.AsyncQueue<SpeechFlowChunk | null>                     | null = null
     private closing                                                                             = false
+    private lastResultEndMs                                                                     = 0
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -62,8 +63,11 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
         /*  clear destruction flag  */
         this.closing = false
+        /*  reset result end time tracking  */
+        this.lastResultEndMs = 0
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -152,12 +156,16 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
                     /*  fallback: use result timing  */
                     const resultEnd = result.resultEndTime
                     if (resultEnd) {
-                        tsEnd = Duration.fromMillis(
+                        tsStart = Duration.fromMillis(this.lastResultEndMs).plus(this.timeZeroOffset)
+                        tsEnd   = Duration.fromMillis(
                             (Number(resultEnd.seconds ?? 0) * 1000) +
                             (Number(resultEnd.nanos ?? 0) / 1000000)
                         ).plus(this.timeZeroOffset)
                     }
                 }
+                /*  track raw end time for next fallback estimation  */
+                this.lastResultEndMs = tsEnd.minus(this.timeZeroOffset).toMillis()
                 this.log("info", `text received (start: ${tsStart.toMillis()}ms, ` +
                     `end: ${tsEnd.toMillis()}ms, ` +
                     `kind: ${isFinal ? "final" : "intermediate"}): ` +

package/speechflow-cli/src/speechflow-node-a2t-openai.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -25,7 +25,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
     /*  internal state  */
     private openai:            OpenAI                                    | null = null
     private ws:                ws.WebSocket                              | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null>  | null = null
+    private queue:             util.AsyncQueue<SpeechFlowChunk | null>   | null = null
     private resampler:         SpeexResampler                            | null = null
     private closing                                                             = false
     private connectionTimeout: ReturnType<typeof setTimeout>             | null = null
@@ -67,7 +67,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
         this.closing = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -139,10 +139,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
         })
         /*  hook onto session events  */
-        this.ws.on("open", () => {
-            this.log("info", "WebSocket connection opened")
-            sendMessage({ type: "transcription.create" })
-        })
         this.ws.on("close", () => {
             this.log("info", "WebSocket connection closed")
             if (!this.closing && this.queue !== null)
@@ -167,8 +163,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             }, new Map<string, any>())
         }
-        /*  track transcription text  */
-        let text = ""
+        /*  remember opening time to receive time zero offset  */
+        this.timeOpen = DateTime.now()
+        /*  track transcription text per item  */
+        const textByItem = new Map<string, string>()
         this.ws.on("message", (data) => {
             let ev: Record<string, unknown>
             try {
@@ -186,13 +185,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                 case "transcription_session.created":
                     break
                 case "conversation.item.created": {
-                    text = ""
+                    const itemId = (ev.item as Record<string, unknown>)?.id as string
+                    if (itemId)
+                        textByItem.set(itemId, "")
                     break
                 }
                 case "conversation.item.input_audio_transcription.delta": {
-                    text += ev.delta as string
+                    const itemId = ev.item_id as string
+                    const text   = (textByItem.get(itemId) ?? "") + (ev.delta as string)
+                    textByItem.set(itemId, text)
                     if (this.params.interim && !this.closing && this.queue !== null) {
-                        const itemId = ev.item_id as string
                         const timing = speechTiming.get(itemId)
                         const start  = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
                         const end    = timing !== undefined ? Duration.fromMillis(timing.endMs)   : start
@@ -204,7 +206,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                 }
                 case "conversation.item.input_audio_transcription.completed": {
                     if (!this.closing && this.queue !== null) {
-                        text = ev.transcript as string
+                        const text   = ev.transcript as string
                         const itemId = ev.item_id as string
                         const timing = speechTiming.get(itemId)
                         const start  = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
@@ -213,8 +215,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                         chunk.meta = aggregateMeta(start, end)
                         metastore.prune(start)
                         speechTiming.delete(itemId)
+                        textByItem.delete(itemId)
                         this.queue.write(chunk)
-                        text = ""
                     }
                     break
                 }
@@ -248,9 +250,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             }
         })
-        /*  remember opening time to receive time zero offset  */
-        this.timeOpen = DateTime.now()
         /*  provide Duplex stream and internally attach to OpenAI API  */
         const self = this
         const reads = new util.PromiseSet<void>()
@@ -260,7 +259,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             decodeStrings:      false,
             highWaterMark:      1,
             write (chunk: SpeechFlowChunk, encoding, callback) {
-                if (self.closing || self.ws === null) {
+                if (self.closing || self.ws === null || self.resampler === null) {
                     callback(new Error("stream already destroyed"))
                     return
                 }
@@ -274,7 +273,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                         if (chunk.meta.size > 0)
                             metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
                         try {
-                            const payload = self.resampler!.processChunk(chunk.payload)
+                            const payload = self.resampler.processChunk(chunk.payload)
                             const audioB64 = payload.toString("base64")
                             sendMessage({
                                 type: "input_audio_buffer.append",
@@ -296,17 +295,23 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
                 }
                 try {
                     sendMessage({ type: "input_audio_buffer.commit" })
-                    self.ws.close()
-                    await util.sleep(50)
+                    self.ws?.close()
+                    await new Promise<void>((resolve) => {
+                        const timeout = setTimeout(() => { resolve() }, 5000)
+                        self.ws?.once("close", () => {
+                            clearTimeout(timeout)
+                            resolve()
+                        })
+                    })
                 }
                 catch (error) {
                     self.log("warning", `error closing OpenAI connection: ${error}`)
                 }
+                /*  await all read operations  */
                 await reads.awaitAll()
-                const chunks: Array<SpeechFlowChunk | null> = self.queue?.drain() ?? []
-                for (const chunk of chunks)
-                    this.push(chunk)
-                this.push(null)
+                /*  NOTICE: do not push null here -- let the WebSocket close event handle it  */
                 callback()
             },
             read (size) {
@@ -346,6 +351,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             this.connectionTimeout = null
         }
+        /*  shutdown stream  */
+        if (this.stream !== null) {
+            await util.destroyStream(this.stream)
+            this.stream = null
+        }
         /*  signal EOF to any pending read operations  */
         if (this.queue !== null) {
             this.queue.write(null)
@@ -362,12 +373,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             this.openai = null
         /*  close resampler  */
-        this.resampler = null
-        /*  shutdown stream  */
-        if (this.stream !== null) {
-            await util.destroyStream(this.stream)
-            this.stream = null
+        if (this.resampler !== null) {
+            this.resampler.destroy()
+            this.resampler = null
         }
     }
 }

package/speechflow-cli/src/speechflow-node-t2a-amazon.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -131,9 +131,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                 else if (chunk.payload === "")
                     callback()
                 else {
+                    let callbackCalled = false
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null
-                        callback(new Error("AWS Polly API timeout"))
+                        if (!callbackCalled) {
+                            callbackCalled = true
+                            callback(new Error("AWS Polly API timeout"))
+                        }
                     }, 60 * 1000)
                     const clearProcessTimeout = () => {
                         if (processTimeout !== null) {
@@ -143,8 +147,11 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                     }
                     self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
                     textToSpeech(chunk.payload as string).then((buffer) => {
+                        clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         if (self.closing) {
-                            clearProcessTimeout()
                             callback(new Error("stream destroyed during processing"))
                             return
                         }
@@ -157,11 +164,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                         chunkNew.type         = "audio"
                         chunkNew.payload      = buffer
                         chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
-                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }).catch((error: unknown) => {
                         clearProcessTimeout()
+                        if (callbackCalled)
+                            return
+                        callbackCalled = true
                         callback(util.ensureError(error, "AWS Polly processing failed"))
                     })
                 }
@@ -184,8 +193,10 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
         }
         /*  destroy resampler  */
-        if (this.resampler !== null)
+        if (this.resampler !== null) {
+            this.resampler.destroy()
             this.resampler = null
+        }
         /*  destroy AWS Polly API  */
         if (this.client !== null) {