npm - speechflow - Versions diffs - 1.4.5 → 1.5.0 - Mend

speechflow 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts CHANGED Viewed

@@ -75,7 +75,7 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
                 "c:a":         "pcm_s16le",
                 "ar":          this.config.audioSampleRate,
                 "ac":          this.config.audioChannels,
-                "f":           "s16le",
+                "f":           "s16le"
             } : {}),
             ...(this.params.dst === "wav" ? {
                 "f":           "wav"
@@ -90,7 +90,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
                 "f":           "opus"
             } : {})
         })
-        this.ffmpeg.run()
+        try {
+            this.ffmpeg.run()
+        }
+        catch (err) {
+            throw new Error(`failed to start FFmpeg process: ${err}`)
+        }
         /*  establish a duplex stream and connect it to FFmpeg  */
         this.stream = Stream.Duplex.from({
@@ -120,7 +125,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
         /*  shutdown FFmpeg  */
         if (this.ffmpeg !== null) {
-            this.ffmpeg.kill()
+            try {
+                this.ffmpeg.kill()
+            }
+            catch {
+                /*  ignore kill errors during cleanup  */
+            }
             this.ffmpeg = null
         }
     }

package/speechflow-cli/src/speechflow-node-a2a-filler.ts ADDED Viewed

@@ -0,0 +1,223 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream           from "node:stream"
+import { EventEmitter } from "node:events"
+import { Duration }     from "luxon"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as utils                          from "./speechflow-utils"
+class AudioFiller extends EventEmitter {
+    private emittedEndSamples = 0           /* stream position in samples already emitted */
+    private readonly bytesPerSample = 2     /* PCM I16 */
+    private readonly bytesPerFrame: number
+    private readonly sampleTolerance = 0.5  /* tolerance for floating-point sample comparisons */
+    constructor (private sampleRate = 48000, private channels = 1) {
+        super()
+        this.bytesPerFrame = this.channels * this.bytesPerSample
+    }
+    /*  optional helper to allow subscribing with strong typing  */
+    public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
+    public on(event: string, listener: (...args: any[]) => void): this {
+        return super.on(event, listener)
+    }
+    /*  convert fractional samples to duration  */
+    private samplesFromDuration(duration: Duration): number {
+        const seconds = duration.as("seconds")
+        const samples = seconds * this.sampleRate
+        return samples
+    }
+    /*  convert duration to fractional samples  */
+    private durationFromSamples(samples: number): Duration {
+        const seconds = samples / this.sampleRate
+        return Duration.fromObject({ seconds })
+    }
+    /*  emit a chunk of silence  */
+    private emitSilence (fromSamples: number, toSamples: number) {
+        const frames = Math.max(0, Math.floor(toSamples - fromSamples))
+        if (frames <= 0)
+            return
+        const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
+        const timestampStart = this.durationFromSamples(fromSamples)
+        const timestampEnd   = this.durationFromSamples(toSamples)
+        const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
+        this.emit("chunk", chunk)
+    }
+    /*  add a chunk of audio for processing  */
+    public add (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }): void {
+        const startSamp = this.samplesFromDuration(chunk.timestampStart)
+        const endSamp   = this.samplesFromDuration(chunk.timestampEnd)
+        if (endSamp < startSamp)
+            throw new Error("invalid timestamps")
+        /*  if chunk starts beyond what we've emitted, insert silence for the gap  */
+        if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
+            this.emitSilence(this.emittedEndSamples, startSamp)
+            this.emittedEndSamples = startSamp
+        }
+        /*  if chunk ends before or at emitted end, we have it fully covered, so drop it  */
+        if (endSamp <= this.emittedEndSamples + this.sampleTolerance)
+            return
+        /*  trim any overlap at the head  */
+        const trimHead = Math.max(0, Math.floor(this.emittedEndSamples - startSamp))
+        const availableFrames = Math.floor((endSamp - startSamp) - trimHead)
+        if (availableFrames <= 0)
+            return
+        /*  determine how many frames the buffer actually has; trust timestamps primarily  */
+        const bufFrames = Math.floor(chunk.payload.length / this.bytesPerFrame)
+        const startFrame = Math.min(trimHead, bufFrames)
+        const endFrame = Math.min(startFrame + availableFrames, bufFrames)
+        if (endFrame <= startFrame)
+            return
+        /*  determine trimmed/normalized chunk  */
+        const payload = chunk.payload.subarray(
+            startFrame * this.bytesPerFrame,
+            endFrame * this.bytesPerFrame)
+        /*  emit trimmed/normalized chunk  */
+        const outStartSamples = startSamp + startFrame
+        const outEndSamples   = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
+        const timestampStart  = this.durationFromSamples(outStartSamples)
+        const timestampEnd    = this.durationFromSamples(outEndSamples)
+        const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
+        this.emit("chunk", c)
+        /*  advance emitted cursor  */
+        this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
+    }
+}
+/*  SpeechFlow node for filling audio gaps  */
+export default class SpeechFlowNodeFiller extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "filler"
+    /*  internal state  */
+    private destroyed = false
+    private filler: AudioFiller | null = null
+    private sendQueue: utils.AsyncQueue<SpeechFlowChunk | null> | null = null
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            segment: { type: "number", val: 50, pos: 0, match: (n: number) => n >= 10 && n <= 1000 }
+        })
+        /*  declare node input/output format  */
+        this.input  = "audio"
+        this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
+        /*  clear destruction flag  */
+        this.destroyed = false
+        /*  establish queues  */
+        this.filler  = new AudioFiller(this.config.audioSampleRate, this.config.audioChannels)
+        this.sendQueue = new utils.AsyncQueue<SpeechFlowChunk | null>()
+        /*  shift chunks from filler to send queue  */
+        this.filler.on("chunk", (chunk) => {
+            this.sendQueue?.write(chunk)
+        })
+        /*  establish a duplex stream  */
+        const self = this
+        this.stream = new Stream.Duplex({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            write (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }, encoding, callback) {
+                if (self.destroyed || self.filler === null)
+                    callback(new Error("stream already destroyed"))
+                else if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else {
+                    try {
+                        self.filler.add(chunk)
+                        callback()
+                    }
+                    catch (error: any) {
+                        callback(error)
+                    }
+                }
+            },
+            read (size) {
+                if (self.destroyed || self.sendQueue === null) {
+                    this.push(null)
+                    return
+                }
+                self.sendQueue.read().then((chunk) => {
+                    if (self.destroyed) {
+                        this.push(null)
+                        return
+                    }
+                    if (chunk === null) {
+                        self.log("info", "received EOF signal")
+                        this.push(null)
+                    }
+                    else {
+                        self.log("debug", `received data (${chunk.payload.length} bytes)`)
+                        this.push(chunk)
+                    }
+                }).catch((error) => {
+                    if (!self.destroyed)
+                        self.log("error", `queue read error: ${error.message}`)
+                })
+            },
+            final (callback) {
+                if (self.destroyed) {
+                    callback()
+                    return
+                }
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  indicate destruction  */
+        this.destroyed = true
+        /*  destroy queues  */
+        if (this.sendQueue !== null) {
+            this.sendQueue.destroy()
+            this.sendQueue = null
+        }
+        /*  destroy filler  */
+        if (this.filler !== null) {
+            this.filler.removeAllListeners()
+            this.filler = null
+        }
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+    }
+}

package/speechflow-cli/src/speechflow-node-a2a-gain.ts ADDED Viewed

@@ -0,0 +1,98 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream from "node:stream"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as utils                          from "./speechflow-utils"
+/*  SpeechFlow node for gain adjustment in audio-to-audio passing  */
+export default class SpeechFlowNodeGain extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "gain"
+    /*  internal state  */
+    private destroyed = false
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            db: { type: "number", val: 0, pos: 0, match: (n: number) => n >= -60 && n <= 60 }
+        })
+        /*  declare node input/output format  */
+        this.input  = "audio"
+        this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
+        /*  clear destruction flag  */
+        this.destroyed = false
+        /*  adjust gain  */
+        const adjustGain = (chunk: SpeechFlowChunk & { payload: Buffer }, db: number) => {
+            const dv = new DataView(chunk.payload.buffer, chunk.payload.byteOffset, chunk.payload.byteLength)
+            const gainFactor = utils.dB2lin(db)
+            for (let i = 0; i < dv.byteLength; i += 2) {
+                let sample = dv.getInt16(i, true)
+                sample *= gainFactor
+                sample = Math.max(Math.min(sample, 32767), -32768)
+                dv.setInt16(i, sample, true)
+            }
+        }
+        /*  establish a transform stream  */
+        const self = this
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
+                if (self.destroyed) {
+                    callback(new Error("stream already destroyed"))
+                    return
+                }
+                if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else if (chunk.payload.byteLength % 2 !== 0)
+                    callback(new Error("invalid audio buffer size (not 16-bit aligned)"))
+                else {
+                    /*  adjust chunk  */
+                    adjustGain(chunk, self.params.db)
+                    this.push(chunk)
+                    callback()
+                }
+            },
+            final (callback) {
+                if (self.destroyed) {
+                    callback()
+                    return
+                }
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  indicate destruction  */
+        this.destroyed = true
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+    }
+}

package/speechflow-cli/src/speechflow-node-a2a-gender.ts CHANGED Viewed

@@ -21,7 +21,7 @@ type AudioQueueElement = {
     type:         "audio-frame",
     chunk:        SpeechFlowChunk,
     data:         Float32Array,
-    gender?:      "male" | "female"
+    gender?:      "male" | "female" | "unknown"
 } | {
     type:         "audio-eof"
 }
@@ -32,7 +32,6 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
     public static name = "gender"
     /*  internal state  */
-    private static speexInitialized = false
     private classifier: Transformers.AudioClassificationPipeline | null = null
     private queue     = new utils.Queue<AudioQueueElement>()
     private queueRecv = this.queue.pointerUse("recv")
@@ -66,7 +65,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
         this.shutdown = false
         /*  pass-through logging  */
-        const log = (level: string, msg: string) => { this.log(level, msg) }
+        const log = this.log.bind(this)
         /*  the used model  */
         const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
@@ -81,7 +80,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
                 artifact += `:${progress.file}`
             let percent = 0
             if (typeof progress.loaded === "number" && typeof progress.total === "number")
-                percent = (progress.loaded as number / progress.total as number) * 100
+                percent = (progress.loaded / progress.total) * 100
             else if (typeof progress.progress === "number")
                 percent = progress.progress
             if (percent > 0)
@@ -92,7 +91,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
                 return
             for (const [ artifact, percent ] of progressState) {
                 this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
-                if (percent >= 1.0)
+                if (percent >= 100.0)
                     progressState.delete(artifact)
             }
         }, 1000)
@@ -103,11 +102,17 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
                 device:    "auto",
                 progress_callback: progressCallback
             })
-            const timeoutPromise = new Promise((resolve, reject) => setTimeout(() =>
-                reject(new Error("model initialization timeout")), 30 * 1000))
+            let timeoutId: ReturnType<typeof setTimeout> | null = null
+            const timeoutPromise = new Promise((resolve, reject) => {
+                timeoutId = setTimeout(() =>
+                    reject(new Error("model initialization timeout")), 30 * 1000)
+            })
             this.classifier = await Promise.race([
                 pipelinePromise, timeoutPromise
-            ]) as Transformers.AudioClassificationPipeline
+            ]).finally(() => {
+                if (timeoutId !== null)
+                    clearTimeout(timeoutId)
+            }) as Transformers.AudioClassificationPipeline
         }
         catch (error) {
             if (this.progressInterval) {
@@ -128,10 +133,15 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
             if (this.shutdown || this.classifier === null)
                 throw new Error("classifier shutdown during operation")
             const classifyPromise = this.classifier(data)
-            const timeoutPromise = new Promise((resolve, reject) => setTimeout(() =>
-                reject(new Error("classification timeout")), 30 * 1000))
-            const result = await Promise.race([ classifyPromise, timeoutPromise ]) as
-                Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
+            let timeoutId: ReturnType<typeof setTimeout> | null = null
+            const timeoutPromise = new Promise((resolve, reject) => {
+                timeoutId = setTimeout(() =>
+                    reject(new Error("classification timeout")), 30 * 1000)
+            })
+            const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
+                if (timeoutId !== null)
+                    clearTimeout(timeoutId)
+            }) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
             const classified = Array.isArray(result) ?
                 result as Transformers.AudioClassificationOutput :
                 [ result ]
@@ -139,15 +149,20 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
             const c2 = classified.find((c: any) => c.label === "female")
             const male   = c1 ? c1.score : 0.0
             const female = c2 ? c2.score : 0.0
-            return (male > female ? "male" : "female")
+            if (male > female)
+                return "male"
+            else if (male < female)
+                return "female"
+            else
+                return "unknown"
         }
         /*  define sample rate required by model  */
         const sampleRateTarget = 16000
         /*  work off queued audio frames  */
-        const frameWindowDuration = 0.5
-        const frameWindowSamples  = frameWindowDuration * sampleRateTarget
+        const frameWindowDuration = this.params.window / 1000
+        const frameWindowSamples  = Math.floor(frameWindowDuration * sampleRateTarget)
         let lastGender = ""
         let workingOff = false
         const workOffQueue = async () => {
@@ -236,8 +251,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
                         const wav = new WaveFile()
                         wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
                         wav.toSampleRate(sampleRateTarget, { method: "cubic" })
-                        data = wav.getSamples(false, Float32Array<ArrayBuffer>) as
-                            any as Float32Array<ArrayBuffer>
+                        data = wav.getSamples(false, Float32Array) as any as Float32Array<ArrayBuffer>
                         /*  queue chunk and converted data  */
                         self.queueRecv.append({ type: "audio-frame", chunk, data })

package/speechflow-cli/src/speechflow-node-a2a-meter.ts CHANGED Viewed

@@ -22,6 +22,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
     /*  internal state  */
     private emitInterval: ReturnType<typeof setInterval> | null = null
     private calcInterval: ReturnType<typeof setInterval> | null = null
+    private silenceTimer: ReturnType<typeof setTimeout> | null = null
     private chunkBuffer = new Float32Array(0)
     private destroyed = false
@@ -63,7 +64,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
         this.chunkBuffer = new Float32Array(0)
         /*  define chunk processing function  */
-        let timer: ReturnType<typeof setTimeout> | null = null
         const processChunk = (chunkData: Float32Array) => {
             /*  update internal audio sample sliding window  */
             const newWindow = new Float32Array(sampleWindowSize)
@@ -86,11 +86,11 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
                 calculateLoudnessRange: false,
                 calculateTruePeak:      false
             })
-            lufss = lufs.shortTerm ? lufs.shortTerm[0] : 0
+            lufss = lufs.shortTerm ? lufs.shortTerm[0] : -60
             rms = getRMS(audioData, { asDB: true })
-            if (timer !== null)
-                clearTimeout(timer)
-            timer = setTimeout(() => {
+            if (this.silenceTimer !== null)
+                clearTimeout(this.silenceTimer)
+            this.silenceTimer = setTimeout(() => {
                 lufss = -60
                 rms   = -60
             }, 500)
@@ -117,7 +117,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
             this.sendResponse([ "meter", "LUFS-S", lufss ])
             this.sendResponse([ "meter", "RMS", rms ])
             if (this.params.dashboard !== "")
-                this.dashboardInfo("audio", this.params.dashboard, "final", lufss)
+                this.sendDashboard("audio", this.params.dashboard, "final", lufss)
         }, this.params.interval)
         /*  provide Duplex stream and internally attach to meter  */
@@ -172,9 +172,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  indicate destruction  */
-        this.destroyed = true
         /*  stop intervals  */
         if (this.emitInterval !== null) {
             clearInterval(this.emitInterval)
@@ -184,11 +181,18 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
             clearInterval(this.calcInterval)
             this.calcInterval = null
         }
+        if (this.silenceTimer !== null) {
+            clearTimeout(this.silenceTimer)
+            this.silenceTimer = null
+        }
         /*  close stream  */
         if (this.stream !== null) {
             this.stream.destroy()
             this.stream = null
         }
+        /*  indicate destruction  */
+        this.destroyed = true
     }
 }

package/speechflow-cli/src/speechflow-node-a2a-mute.ts CHANGED Viewed

@@ -43,9 +43,10 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
             throw new Error("mute: node already destroyed")
         try {
             if (params.length === 2 && params[0] === "mode") {
-                if (!params[1].match(/^(?:none|silenced|unplugged)$/))
+                if (typeof params[1] !== "string" ||
+                    !params[1].match(/^(?:none|silenced|unplugged)$/))
                     throw new Error("mute: invalid mode argument in external request")
-                const muteMode: MuteMode = params[1] as MuteMode
+                const muteMode = params[1] as MuteMode
                 this.setMuteMode(muteMode)
                 this.sendResponse([ "mute", "mode", muteMode ])
             }

package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  internal dependencies  */
+import { parentPort } from "node:worker_threads"
+/*  external dependencies  */
+import { type DenoiseState, Rnnoise } from "@shiguredo/rnnoise-wasm"
+/*  WASM state  */
+let rnnoise: Rnnoise
+let denoiseState: DenoiseState
+/*  global initialization  */
+;(async () => {
+    try {
+        rnnoise = await Rnnoise.load()
+        denoiseState = rnnoise.createDenoiseState()
+        parentPort!.postMessage({ type: "ready" })
+    }
+    catch (err) {
+        parentPort!.postMessage({ type: "failed", message: `failed to initialize RNNoise: ${err}` })
+        process.exit(1)
+    }
+})()
+/*  receive messages  */
+parentPort!.on("message", (msg) => {
+    if (msg.type === "process") {
+        /*  process a single audio frame  */
+        const { id, data } = msg
+        /*  convert regular Int16Array [-32768,32768]
+            to unusual non-normalized Float32Array [-32768,32768]
+            as required by RNNoise  */
+        const f32a = new Float32Array(data.length)
+        for (let i = 0; i < data.length; i++)
+            f32a[i] = data[i]
+        /*  process frame with RNNoise WASM  */
+        denoiseState.processFrame(f32a)
+        /*  convert back Float32Array to Int16Array  */
+        const i16 = new Int16Array(data.length)
+        for (let i = 0; i < data.length; i++)
+            i16[i] = Math.round(f32a[i])
+        parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
+    }
+    else if (msg.type === "close") {
+        /*  shutdown this process  */
+        try {
+            denoiseState.destroy()
+        }
+        finally {
+            process.exit(0)
+        }
+    }
+})