npm - speechflow - Versions diffs - 2.2.1 → 2.3.0 - Mend

speechflow 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (235) hide show

package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -46,7 +46,7 @@ parentPort!.on("message", (msg) => {
         /*  convert back Float32Array to Int16Array  */
         const i16 = new Int16Array(data.length)
         for (let i = 0; i < data.length; i++)
-            i16[i] = Math.round(f32a[i])
+            i16[i] = Math.max(-32768, Math.min(32767, Math.round(f32a[i])))
         /*  send processed frame back to parent  */
         parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])

package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -71,14 +71,27 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
             })
         })
+        /*  track pending promises  */
+        const pending = new Map<string, {
+            resolve: (arr: Int16Array<ArrayBuffer>) => void,
+            reject:  (err: Error)                   => void
+        }>()
+        /*  reject all pending promises on worker exit  */
+        this.worker.on("exit", () => {
+            const err = new Error("worker terminated")
+            for (const cb of pending.values())
+                cb.reject(err)
+            pending.clear()
+        })
         /*  receive message from worker  */
-        const pending = new Map<string, (arr: Int16Array<ArrayBuffer>) => void>()
         this.worker.on("message", (msg: any) => {
             if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
                 const cb = pending.get(msg.id)
                 pending.delete(msg.id)
                 if (cb)
-                    cb(msg.data)
+                    cb.resolve(msg.data)
                 else
                     this.log("warning", `RNNoise worker thread sent back unexpected id: ${msg.id}`)
             }
@@ -92,8 +105,8 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
             if (this.closing)
                 return segment
             const id = `${seq++}`
-            return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
-                pending.set(id, (segment) => { resolve(segment) })
+            return new Promise<Int16Array<ArrayBuffer>>((resolve, reject) => {
+                pending.set(id, { resolve, reject })
                 this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
             })
         }
@@ -113,20 +126,19 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
                     callback(new Error("invalid chunk payload type"))
                 else {
                     /*  convert Buffer into Int16Array  */
-                    const payload = util.convertBufToI16(chunk.payload)
+                    const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
                     /*  process Int16Array in necessary segments  */
                     util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) =>
                         workerProcessSegment(segment)
                     ).then((payload: Int16Array<ArrayBuffer>) => {
                         /*  convert Int16Array into Buffer  */
-                        const buf = util.convertI16ToBuf(payload)
-                        /*  update chunk  */
-                        chunk.payload = buf
+                        const buf = util.convertI16ToBuf(payload, self.config.audioLittleEndian)
-                        /*  forward updated chunk  */
-                        this.push(chunk)
+                        /*  forward cloned chunk with updated payload  */
+                        const chunkNew = chunk.clone()
+                        chunkNew.payload = buf
+                        this.push(chunkNew)
                         callback()
                     }).catch((err: unknown) => {
                         const error = util.ensureError(err)

package/speechflow-cli/src/speechflow-node-a2a-speex.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -53,7 +53,9 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
         const wasmBinary = await fs.promises.readFile(
             path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
         const speexModule = await loadSpeexModule({
-            wasmBinary: wasmBinary.buffer
+            wasmBinary: wasmBinary.buffer.slice(
+                wasmBinary.byteOffset,
+                wasmBinary.byteOffset + wasmBinary.byteLength)
         })
         this.speexProcessor = new SpeexPreprocessor(
             speexModule, this.sampleSize, this.config.audioSampleRate)
@@ -79,7 +81,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
                     callback(new Error("invalid chunk payload type"))
                 else {
                     /*  convert Buffer into Int16Array  */
-                    const payload = util.convertBufToI16(chunk.payload)
+                    const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
                     /*  process Int16Array in necessary fixed-size segments  */
                     util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
@@ -94,13 +96,12 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
                             throw new Error("stream already destroyed")
                         /*  convert Int16Array back into Buffer  */
-                        const buf = util.convertI16ToBuf(payload)
+                        const buf = util.convertI16ToBuf(payload, self.config.audioLittleEndian)
-                        /*  update chunk  */
-                        chunk.payload = buf
-                        /*  forward updated chunk  */
-                        this.push(chunk)
+                        /*  forward cloned chunk with updated payload  */
+                        const chunkNew = chunk.clone()
+                        chunkNew.payload = buf
+                        this.push(chunkNew)
                         callback()
                     }).catch((err: unknown) => {
                         const error = util.ensureError(err)

package/speechflow-cli/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -258,6 +258,9 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         return
                     }
+                    /*  await forthcoming audio chunks (forward declaration)  */
+                    let awaitForthcomingChunks: () => void = () => {}
                     /*  flush pending audio chunks  */
                     const flushPendingChunks = () => {
                         let pushed = 0
@@ -289,22 +292,22 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                                 this.push(chunk)
                                 pushed++
                             }
-                            else if (self.params.mode === "unplugged" && pushed === 0) {
-                                /*  we have to await chunks now, as in unplugged
-                                    mode we else would be never called again until
-                                    we at least once push a new chunk as the result  */
-                                setTimeout(() => {
-                                    if (self.closing || self.queue === null)
-                                        return
-                                    tryToRead()
-                                }, 0)
-                                return
-                            }
+                        }
+                        /*  in unplugged mode, if no chunk was pushed (all were
+                            non-speech), we need to wait event-driven for new
+                            data, as the stream won't call read() again until
+                            we push something  */
+                        if (pushed === 0
+                            && !self.closing
+                            && !self.activeEventListeners.has(awaitForthcomingChunks)) {
+                            self.queue.once("write", awaitForthcomingChunks)
+                            self.activeEventListeners.add(awaitForthcomingChunks)
                         }
                     }
                     /*  await forthcoming audio chunks  */
-                    const awaitForthcomingChunks = () => {
+                    awaitForthcomingChunks = () => {
                         self.activeEventListeners.delete(awaitForthcomingChunks)
                         if (self.closing)
                             return
@@ -339,16 +342,28 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  indicate closing  */
-        this.closing = true
         /*  cleanup tail timer  */
         if (this.tailTimer !== null) {
             clearTimeout(this.tailTimer)
             this.tailTimer = null
         }
-        /*  remove all event listeners  */
+        /*  flush VAD (before closing, as flush triggers callbacks which need active state)  */
+        if (this.vad !== null) {
+            try {
+                const flushPromise = this.vad.flush()
+                const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
+                await Promise.race([ flushPromise, timeoutPromise ])
+            }
+            catch (error) {
+                this.log("warning", `VAD flush error during close: ${error}`)
+            }
+        }
+        /*  indicate closing  */
+        this.closing = true
+        /*  remove all remaining event listeners  */
         this.activeEventListeners.forEach((listener) => {
             this.queue.removeListener("write", listener)
         })
@@ -360,23 +375,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
             this.stream = null
         }
-        /*  cleanup queue pointers before closing VAD to prevent callback access  */
-        this.queue.pointerDelete("recv")
-        this.queue.pointerDelete("vad")
-        this.queue.pointerDelete("send")
-        /*  close VAD  */
+        /*  destroy VAD  */
         if (this.vad !== null) {
-            try {
-                const flushPromise = this.vad.flush()
-                const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
-                await Promise.race([ flushPromise, timeoutPromise ])
-            }
-            catch (error) {
-                this.log("warning", `VAD flush error during close: ${error}`)
-            }
             this.vad.destroy()
             this.vad = null
         }
+        /*  cleanup queue pointers  */
+        this.queue.pointerDelete("recv")
+        this.queue.pointerDelete("vad")
+        this.queue.pointerDelete("send")
     }
 }

package/speechflow-cli/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -183,9 +183,10 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
                             callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
                             return
                         }
-                        chunk.payload = chunk.payload.subarray(44)
-                        this.push(chunk)
-                        totalSize += chunk.payload.byteLength
+                        const chunkNew = chunk.clone()
+                        chunkNew.payload = chunk.payload.subarray(44)
+                        this.push(chunkNew)
+                        totalSize += chunkNew.payload.byteLength
                         callback()
                     }
                     else {
@@ -210,7 +211,7 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
                         sampleRate:  self.config.audioSampleRate,
                         bitDepth:    self.config.audioBitDepth
                     })
-                    const headerChunk = headerChunkSent?.clone()
+                    const headerChunk = headerChunkSent.clone()
                     headerChunk.payload = headerBuffer
                     headerChunk.meta.set("chunk:seek", 0)
                     this.push(headerChunk)

package/speechflow-cli/src/speechflow-node-a2t-amazon.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -42,6 +42,7 @@ class AsyncQueue<T> {
             resolve?.({ value: null, done: true })
         }
         this.queue.length = 0
+        this.queue.push(null)
     }
     async * [Symbol.asyncIterator] (): AsyncIterator<T> {
         while (true) {
@@ -71,8 +72,9 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
     private client:       TranscribeStreamingClient                | null = null
     private clientStream: AsyncIterable<TranscriptResultStream>    | null = null
     private audioQueue:   AsyncQueue<Uint8Array>                   | null = null
+    private queue:        util.AsyncQueue<SpeechFlowChunk | null>  | null = null
+    private clientStreamStarting                                          = false
     private closing                                                       = false
-    private queue:        util.SingleQueue<SpeechFlowChunk | null> | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -110,10 +112,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
             throw new Error("Amazon Transcribe node currently supports PCM-S16LE audio only")
         /*  clear destruction flag  */
-        this.closing = false
+        this.closing             = false
+        this.clientStreamStarting = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -138,24 +141,31 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
         /*  start streaming  */
         const ensureAudioStreamActive = async () => {
-            if (this.clientStream !== null || this.closing)
+            if (this.clientStream !== null || this.clientStreamStarting || this.closing)
                 return
-            const language: LanguageCode = this.params.language === "de" ? "de-DE" : "en-US"
-            const command = new StartStreamTranscriptionCommand({
-                LanguageCode: language,
-                EnablePartialResultsStabilization: this.params.interim,
-                ...(this.params.interim ? { PartialResultsStability: "low" } : {}),
-                MediaEncoding: "pcm",
-                MediaSampleRateHertz: this.config.audioSampleRate,
-                AudioStream: audioStream,
-            })
-            const response = await this.client!.send(command)
-            const stream = response.TranscriptResultStream
-            if (!stream)
-                throw new Error("no TranscriptResultStream returned")
-            this.clientStream = stream
+            this.clientStreamStarting = true
+            try {
+                const language: LanguageCode = this.params.language === "de" ? "de-DE" : "en-US"
+                const command = new StartStreamTranscriptionCommand({
+                    LanguageCode: language,
+                    EnablePartialResultsStabilization: this.params.interim,
+                    ...(this.params.interim ? { PartialResultsStability: "low" } : {}),
+                    MediaEncoding: "pcm",
+                    MediaSampleRateHertz: this.config.audioSampleRate,
+                    AudioStream: audioStream,
+                })
+                const response = await this.client!.send(command)
+                const stream = response.TranscriptResultStream
+                if (!stream)
+                    throw new Error("no TranscriptResultStream returned")
+                this.clientStream = stream
+            }
+            catch (err) {
+                this.clientStreamStarting = false
+                throw err
+            }
             ;(async () => {
-                for await (const event of stream) {
+                for await (const event of this.clientStream!) {
                     const te = event.TranscriptEvent
                     if (!te?.Transcript?.Results)
                         continue
@@ -194,6 +204,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                 }
             })().catch((err: unknown) => {
                 this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
+                this.clientStream         = null
+                this.clientStreamStarting = false
             })
         }
@@ -259,7 +271,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                         this.push(null)
                     }
                     else {
-                        self.log("debug", `received data (${chunk.payload.length} bytes): "${chunk.payload}"`)
+                        self.log("debug", `received data (${chunk.payload.length} bytes)`)
                         this.push(chunk)
                     }
                 }).catch((error: unknown) => {
@@ -273,7 +285,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
     /*  close node  */
     async close () {
         /*  indicate closing first to stop all async operations  */
-        this.closing = true
+        this.closing              = true
+        this.clientStreamStarting = false
         /*  shutdown stream  */
         if (this.stream !== null) {

package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -24,7 +24,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
     private dg:                Deepgram.LiveClient                      | null = null
     private closing                                                            = false
     private connectionTimeout: ReturnType<typeof setTimeout>            | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null> | null = null
+    private queue:             util.AsyncQueue<SpeechFlowChunk | null>  | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -64,7 +64,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                         balance += balanceResponse.result.balances[0]?.amount ?? 0
                 }
             }
-            else if (response?.error !== null)
+            else if (response !== null && response.error !== null)
                 this.log("warning", `API error fetching projects: ${response.error}`)
         }
         catch (error) {
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
         this.closing = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -145,7 +145,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                 { word: string, punctuated_word?: string, start: number, end: number }[]
             const isFinal     = (data.is_final     as boolean) ?? false
             const speechFinal = (data.speech_final as boolean) ?? false
-            const kind = ((interim && isFinal) || (endpointing > 0 && speechFinal)) ? "final" : "intermediate"
+            const kind = (isFinal || (endpointing > 0 && speechFinal)) ? "final" : "intermediate"
             if (text === "")
                 this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
             else {
@@ -206,6 +206,13 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                 }
                 resolve(true)
             })
+            this.dg!.once(Deepgram.LiveTranscriptionEvents.Error, (err: Error) => {
+                if (this.connectionTimeout !== null) {
+                    clearTimeout(this.connectionTimeout)
+                    this.connectionTimeout = null
+                }
+                reject(err)
+            })
         })
         /*  remember opening time to receive time zero offset  */
@@ -234,7 +241,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                         if (chunk.meta.size > 0)
                             metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
                         try {
-                            self.dg.send(chunk.payload.buffer) /* intentionally discard all time information */
+                            /*  send buffer (and intentionally discard all time information)  */
+                            self.dg.send(chunk.payload.buffer.slice(
+                                chunk.payload.byteOffset,
+                                chunk.payload.byteOffset + chunk.payload.byteLength
+                            ))
                         }
                         catch (error) {
                             callback(util.ensureError(error, "failed to send to Deepgram"))

package/speechflow-cli/src/speechflow-node-a2t-google.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /*
 **  SpeechFlow - Speech Processing Flow Graph
-**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
@@ -24,7 +24,7 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
     /*  internal state  */
     private client:          GoogleSpeech.SpeechClient                                   | null = null
     private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
-    private queue:           util.SingleQueue<SpeechFlowChunk | null>                    | null = null
+    private queue:           util.AsyncQueue<SpeechFlowChunk | null>                     | null = null
     private closing                                                                             = false
     /*  construct node  */
@@ -63,7 +63,7 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
         this.closing = false
         /*  create queue for results  */
-        this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
+        this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
@@ -152,7 +152,8 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
                     /*  fallback: use result timing  */
                     const resultEnd = result.resultEndTime
                     if (resultEnd) {
-                        tsEnd = Duration.fromMillis(
+                        tsStart = Duration.fromMillis(0).plus(this.timeZeroOffset)
+                        tsEnd   = Duration.fromMillis(
                             (Number(resultEnd.seconds ?? 0) * 1000) +
                             (Number(resultEnd.nanos ?? 0) / 1000000)
                         ).plus(this.timeZeroOffset)