npm - speechflow - Versions diffs - 1.4.5 → 1.5.0 - Mend

speechflow 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts ADDED Viewed

@@ -0,0 +1,164 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream               from "node:stream"
+import { Worker }           from "node:worker_threads"
+import { resolve }          from "node:path"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as utils                          from "./speechflow-utils"
+/*  SpeechFlow node for RNNoise based noise suppression in audio-to-audio passing  */
+export default class SpeechFlowNodeRNNoise extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "rnnoise"
+    /*  internal state  */
+    private destroyed = false
+    private sampleSize = 480 /* = 10ms at 48KHz, as required by RNNoise! */
+    private worker: Worker | null = null
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({})
+        /*  declare node input/output format  */
+        this.input  = "audio"
+        this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
+        /*  clear destruction flag  */
+        this.destroyed = false
+        /*  initialize worker  */
+        this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
+        this.worker.on("error", (err) => {
+            this.log("error", `RNNoise worker thread error: ${err}`)
+        })
+        this.worker.on("exit", (code) => {
+            if (code !== 0)
+                this.log("error", `RNNoise worker thread exited with error code ${code}`)
+            else
+                this.log("info", `RNNoise worker thread exited with regular code ${code}`)
+        })
+        await new Promise<void>((resolve, reject) => {
+            const timeout = setTimeout(() => {
+                reject(new Error("RNNoise worker thread initialization timeout"))
+            }, 5000)
+            this.worker!.once("message", (msg: any) => {
+                clearTimeout(timeout)
+                if (typeof msg === "object" && msg !== null && msg.type === "ready")
+                    resolve()
+                else if (typeof msg === "object" && msg !== null && msg.type === "failed")
+                    reject(new Error(msg.message ?? "RNNoise worker thread initialization failed"))
+                else
+                    reject(new Error(`RNNoise worker thread sent unexpected message on startup`))
+            })
+            this.worker!.once("error", (err) => {
+                clearTimeout(timeout)
+                reject(err)
+            })
+        })
+        /*  receive message from worker  */
+        const pending = new Map<string, (arr: Int16Array<ArrayBuffer>) => void>()
+        this.worker.on("message", (msg: any) => {
+            if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
+                const cb = pending.get(msg.id)
+                pending.delete(msg.id)
+                if (cb)
+                    cb(msg.data)
+                else
+                    this.log("warning", `RNNoise worker thread sent back unexpected id: ${msg.id}`)
+            }
+            else
+                this.log("warning", `RNNoise worker thread sent unexpected message: ${JSON.stringify(msg)}`)
+        })
+        /*  send message to worker  */
+        let seq = 0
+        const workerProcessSegment = async (segment: Int16Array<ArrayBuffer>) => {
+            if (this.destroyed)
+                return segment
+            const id = `${seq++}`
+            return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
+                pending.set(id, (segment: Int16Array<ArrayBuffer>) => { resolve(segment) })
+                this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
+            })
+        }
+        /*  establish a transform stream  */
+        const self = this
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
+                if (self.destroyed) {
+                    callback(new Error("stream already destroyed"))
+                    return
+                }
+                if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else {
+                    /*  convert Buffer into Int16Array  */
+                    const payload = utils.convertBufToI16(chunk.payload)
+                    /*  process Int16Array in necessary segments  */
+                    utils.processInt16ArrayInSegments(payload, self.sampleSize, (segment) =>
+                        workerProcessSegment(segment)
+                    ).then((payload: Int16Array<ArrayBuffer>) => {
+                        /*  convert Int16Array into Buffer  */
+                        const buf = utils.convertI16ToBuf(payload)
+                        /*  update chunk  */
+                        chunk.payload = buf
+                        /*  forward updated chunk  */
+                        this.push(chunk)
+                        callback()
+                    }).catch((err: Error) => {
+                        self.log("warning", `processing of chunk failed: ${err}`)
+                        callback(err)
+                    })
+                }
+            },
+            final (callback) {
+                if (self.destroyed) {
+                    callback()
+                    return
+                }
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  indicate destruction  */
+        this.destroyed = true
+        /*  shutdown worker  */
+        if (this.worker !== null) {
+            this.worker.terminate()
+            this.worker = null
+        }
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+    }
+}

package/speechflow-cli/src/speechflow-node-a2a-speex.ts ADDED Viewed

@@ -0,0 +1,137 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import path                 from "node:path"
+import fs                   from "node:fs"
+import Stream               from "node:stream"
+/*  external dependencies  */
+import { loadSpeexModule, SpeexPreprocessor } from "@sapphi-red/speex-preprocess-wasm"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as utils                          from "./speechflow-utils"
+/*  SpeechFlow node for Speex based noise suppression in audio-to-audio passing  */
+export default class SpeechFlowNodeSpeex extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "speex"
+    /*  internal state  */
+    private destroyed = false
+    private sampleSize = 480 /* = 10ms at 48KHz */
+    private speexProcessor: SpeexPreprocessor | null = null
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 },
+        })
+        /*  declare node input/output format  */
+        this.input  = "audio"
+        this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
+        /*  clear destruction flag  */
+        this.destroyed = false
+        /*  validate sample rate compatibility  */
+        if (this.config.audioSampleRate !== 48000)
+            throw new Error(`Speex node requires 48KHz sample rate, got ${this.config.audioSampleRate}Hz`)
+        /*  initialize and configure Speex pre-processor  */
+        const wasmBinary = await fs.promises.readFile(
+            path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
+        const speexModule = await loadSpeexModule({
+            wasmBinary: wasmBinary.buffer as ArrayBuffer
+        })
+        this.speexProcessor = new SpeexPreprocessor(
+            speexModule, this.sampleSize, this.config.audioSampleRate)
+        this.speexProcessor.denoise            = true
+        this.speexProcessor.noiseSuppress      = this.params.attenuate
+        this.speexProcessor.agc                = false
+        this.speexProcessor.vad                = false
+        this.speexProcessor.echoSuppress       = 0
+        this.speexProcessor.echoSuppressActive = 0
+        /*  establish a transform stream  */
+        const self = this
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
+                if (self.destroyed) {
+                    callback(new Error("stream already destroyed"))
+                    return
+                }
+                if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else {
+                    /*  convert Buffer into Int16Array  */
+                    const payload = utils.convertBufToI16(chunk.payload)
+                    /*  process Int16Array in necessary fixed-size segments  */
+                    utils.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
+                        if (self.destroyed)
+                            throw new Error("stream already destroyed")
+                        self.speexProcessor?.processInt16(segment)
+                        return Promise.resolve(segment)
+                    }).then((payload: Int16Array<ArrayBuffer>) => {
+                        if (self.destroyed)
+                            throw new Error("stream already destroyed")
+                        /*  convert Int16Array back into Buffer  */
+                        const buf = utils.convertI16ToBuf(payload)
+                        /*  update chunk  */
+                        chunk.payload = buf
+                        /*  forward updated chunk  */
+                        this.push(chunk)
+                        callback()
+                    }).catch((err: Error) => {
+                        self.log("warning", `processing of chunk failed: ${err}`)
+                        callback(err)
+                    })
+                }
+            },
+            final (callback) {
+                if (self.destroyed) {
+                    callback()
+                    return
+                }
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  indicate destruction  */
+        this.destroyed = true
+        /*  destroy processor  */
+        if (this.speexProcessor !== null) {
+            this.speexProcessor.destroy()
+            this.speexProcessor = null
+        }
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+    }
+}

package/speechflow-cli/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -205,7 +205,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                         if ((chunks * chunkSize) < data.length) {
                             const frame = new Float32Array(chunkSize)
                             frame.fill(0)
-                            frame.set(data.slice(chunks * chunkSize, data.length))
+                            frame.set(data.slice(chunks * chunkSize))
                             const segment: AudioQueueElementSegment = { data: frame }
                             segmentData.push(segment)
                         }
@@ -315,7 +315,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                             && element.type === "audio-frame"
                             && element.isSpeech !== undefined)
                             flushPendingChunks()
-                        else if (!self.destroyed) {
+                        else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
                             self.queue.once("write", awaitForthcomingChunks)
                             self.activeEventListeners.add(awaitForthcomingChunks)
                         }
@@ -328,7 +328,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                         && element.type === "audio-frame"
                         && element.isSpeech !== undefined)
                         flushPendingChunks()
-                    else if (!self.destroyed) {
+                    else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
                         self.queue.once("write", awaitForthcomingChunks)
                         self.activeEventListeners.add(awaitForthcomingChunks)
                     }

package/speechflow-cli/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -21,22 +21,19 @@ const writeWavHeader = (
     const bitDepth     = options?.bitDepth    ?? 16    /* 16-Bit */
     const headerLength = 44
-    const dataLength   = length || (4294967295 - 100)
+    const maxDataSize  = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
+    const dataLength   = length ?? maxDataSize
     const fileSize     = dataLength + headerLength
     const header       = Buffer.alloc(headerLength)
-    const RIFF         = Buffer.alloc(4, "RIFF")
-    const WAVE         = Buffer.alloc(4, "WAVE")
-    const fmt          = Buffer.alloc(4, "fmt ")
-    const data         = Buffer.alloc(4, "data")
     const byteRate     = (sampleRate * channels * bitDepth) / 8
     const blockAlign   = (channels * bitDepth) / 8
     let offset = 0
-    RIFF.copy(header, offset);                  offset += RIFF.length
+    header.write("RIFF", offset);               offset += 4
     header.writeUInt32LE(fileSize - 8, offset); offset += 4
-    WAVE.copy(header, offset);                  offset += WAVE.length
-    fmt.copy(header, offset);                   offset += fmt.length
+    header.write("WAVE", offset);               offset += 4
+    header.write("fmt ", offset);               offset += 4
     header.writeUInt32LE(16, offset);           offset += 4
     header.writeUInt16LE(audioFormat, offset);  offset += 2
     header.writeUInt16LE(channels, offset);     offset += 2
@@ -44,7 +41,7 @@ const writeWavHeader = (
     header.writeUInt32LE(byteRate, offset);     offset += 4
     header.writeUInt16LE(blockAlign, offset);   offset += 2
     header.writeUInt16LE(bitDepth, offset);     offset += 2
-    data.copy(header, offset);                  offset += data.length
+    header.write("data", offset);               offset += 4
     header.writeUInt32LE(dataLength, offset);   offset += 4
     return header
@@ -52,6 +49,9 @@ const writeWavHeader = (
 /*  read WAV header  */
 const readWavHeader = (buffer: Buffer) => {
+    if (buffer.length < 44)
+        throw new Error("WAV header too short, expected at least 44 bytes")
     let offset = 0
     const riffHead     = buffer.subarray(offset, offset + 4).toString(); offset += 4
     const fileSize     = buffer.readUInt32LE(offset);                    offset += 4
@@ -67,6 +67,15 @@ const readWavHeader = (buffer: Buffer) => {
     const data         = buffer.subarray(offset, offset + 4).toString(); offset += 4
     const dataLength   = buffer.readUInt32LE(offset);                    offset += 4
+    if (riffHead !== "RIFF")
+        throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
+    if (waveHead !== "WAVE")
+        throw new Error(`Invalid WAV file: expected WAVE header, got "${waveHead}"`)
+    if (fmtHead !== "fmt ")
+        throw new Error(`Invalid WAV file: expected "fmt " header, got "${fmtHead}"`)
+    if (data !== "data")
+        throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
     return {
         riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
         channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
@@ -103,10 +112,8 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
             decodeStrings:      false,
             highWaterMark:      1,
             transform (chunk: SpeechFlowChunk, encoding, callback) {
-                if (!Buffer.isBuffer(chunk.payload)) {
+                if (!Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
-                    return
-                }
                 else if (firstChunk) {
                     if (self.params.mode === "encode") {
                         /*  convert raw/PCM to WAV/PCM
@@ -164,13 +171,13 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
                         callback(new Error(`invalid operation mode "${self.params.mode}"`))
                         return
                     }
+                    firstChunk = false
                 }
                 else {
                     /*  pass-through original chunk  */
                     this.push(chunk)
                     callback()
                 }
-                firstChunk = false
             },
             final (callback) {
                 this.push(null)