npm - speechflow - Versions diffs - 1.3.1 → 1.3.2 - Mend

speechflow 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/CHANGELOG.md +6 -0
package/dst/speechflow-node-a2a-gender.d.ts +2 -0
package/dst/speechflow-node-a2a-gender.js +137 -59
package/dst/speechflow-node-a2a-gender.js.map +1 -1
package/dst/speechflow-node-a2a-meter.d.ts +3 -1
package/dst/speechflow-node-a2a-meter.js +80 -39
package/dst/speechflow-node-a2a-meter.js.map +1 -1
package/dst/speechflow-node-a2a-mute.d.ts +1 -0
package/dst/speechflow-node-a2a-mute.js +37 -11
package/dst/speechflow-node-a2a-mute.js.map +1 -1
package/dst/speechflow-node-a2a-vad.d.ts +3 -0
package/dst/speechflow-node-a2a-vad.js +194 -96
package/dst/speechflow-node-a2a-vad.js.map +1 -1
package/dst/speechflow-node-a2a-wav.js +27 -11
package/dst/speechflow-node-a2a-wav.js.map +1 -1
package/dst/speechflow-node-a2t-deepgram.d.ts +4 -0
package/dst/speechflow-node-a2t-deepgram.js +136 -46
package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
package/dst/speechflow-node-t2a-elevenlabs.d.ts +2 -0
package/dst/speechflow-node-t2a-elevenlabs.js +61 -12
package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
package/dst/speechflow-node-t2a-kokoro.d.ts +1 -0
package/dst/speechflow-node-t2a-kokoro.js +10 -4
package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
package/dst/speechflow-node-t2t-deepl.js +8 -4
package/dst/speechflow-node-t2t-deepl.js.map +1 -1
package/dst/speechflow-node-t2t-format.js +2 -2
package/dst/speechflow-node-t2t-format.js.map +1 -1
package/dst/speechflow-node-t2t-ollama.js +1 -1
package/dst/speechflow-node-t2t-ollama.js.map +1 -1
package/dst/speechflow-node-t2t-openai.js +1 -1
package/dst/speechflow-node-t2t-openai.js.map +1 -1
package/dst/speechflow-node-t2t-sentence.d.ts +1 -1
package/dst/speechflow-node-t2t-sentence.js +34 -18
package/dst/speechflow-node-t2t-sentence.js.map +1 -1
package/dst/speechflow-node-t2t-subtitle.d.ts +0 -1
package/dst/speechflow-node-t2t-subtitle.js +78 -190
package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
package/dst/speechflow-node-t2t-transformers.js +2 -2
package/dst/speechflow-node-t2t-transformers.js.map +1 -1
package/dst/speechflow-node-x2x-filter.js +4 -4
package/dst/speechflow-node-x2x-trace.js +6 -13
package/dst/speechflow-node-x2x-trace.js.map +1 -1
package/dst/speechflow-node-xio-device.js +12 -8
package/dst/speechflow-node-xio-device.js.map +1 -1
package/dst/speechflow-node-xio-file.js +9 -3
package/dst/speechflow-node-xio-file.js.map +1 -1
package/dst/speechflow-node-xio-mqtt.js +5 -2
package/dst/speechflow-node-xio-mqtt.js.map +1 -1
package/dst/speechflow-node-xio-websocket.js +11 -11
package/dst/speechflow-node-xio-websocket.js.map +1 -1
package/dst/speechflow-node.d.ts +0 -2
package/dst/speechflow-node.js +0 -3
package/dst/speechflow-node.js.map +1 -1
package/dst/speechflow-utils.d.ts +5 -0
package/dst/speechflow-utils.js +77 -44
package/dst/speechflow-utils.js.map +1 -1
package/dst/speechflow.js +101 -82
package/dst/speechflow.js.map +1 -1
package/etc/eslint.mjs +1 -2
package/etc/stx.conf +3 -3
package/package.json +6 -6
package/src/speechflow-node-a2a-gender.ts +148 -64
package/src/speechflow-node-a2a-meter.ts +87 -40
package/src/speechflow-node-a2a-mute.ts +39 -11
package/src/speechflow-node-a2a-vad.ts +206 -100
package/src/speechflow-node-a2a-wav.ts +27 -11
package/src/speechflow-node-a2t-deepgram.ts +139 -43
package/src/speechflow-node-t2a-elevenlabs.ts +65 -12
package/src/speechflow-node-t2a-kokoro.ts +11 -4
package/src/speechflow-node-t2t-deepl.ts +9 -4
package/src/speechflow-node-t2t-format.ts +2 -2
package/src/speechflow-node-t2t-ollama.ts +1 -1
package/src/speechflow-node-t2t-openai.ts +1 -1
package/src/speechflow-node-t2t-sentence.ts +37 -20
package/src/speechflow-node-t2t-transformers.ts +4 -3
package/src/speechflow-node-x2x-filter.ts +4 -4
package/src/speechflow-node-x2x-trace.ts +1 -1
package/src/speechflow-node-xio-device.ts +12 -8
package/src/speechflow-node-xio-file.ts +9 -3
package/src/speechflow-node-xio-mqtt.ts +5 -2
package/src/speechflow-node-xio-websocket.ts +12 -12
package/src/speechflow-utils.ts +78 -44
package/src/speechflow.ts +114 -35

package/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -40,6 +40,9 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
     private queueRecv = this.queue.pointerUse("recv")
     private queueVAD  = this.queue.pointerUse("vad")
     private queueSend = this.queue.pointerUse("send")
+    private destroyed = false
+    private tailTimer: ReturnType<typeof setTimeout> | null = null
+    private activeEventListeners = new Set<() => void>()
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -67,8 +70,8 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
         if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
             throw new Error("VAD node currently supports PCM-S16LE audio only")
-        /*  pass-through logging  */
-        const log = (level: string, msg: string) => { this.log(level, msg) }
+        /*  clear destruction flag  */
+        this.destroyed = false
         /*  internal processing constants  */
         const vadSampleRateTarget = 16000 /* internal target of VAD */
@@ -76,75 +79,101 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
         /*  establish Voice Activity Detection (VAD) facility  */
         let tail = false
-        let tailTimer: ReturnType<typeof setTimeout> | null = null
-        this.vad = await RealTimeVAD.new({
-            model:                   "v5",
-            sampleRate:              this.config.audioSampleRate, /* before resampling to 16KHz */
-            frameSamples:            vadSamplesPerFrame,          /* after  resampling to 16KHz */
-            positiveSpeechThreshold: this.params.posSpeechThreshold,
-            negativeSpeechThreshold: this.params.negSpeechThreshold,
-            minSpeechFrames:         this.params.minSpeechFrames,
-            redemptionFrames:        this.params.redemptionFrames,
-            preSpeechPadFrames:      this.params.preSpeechPadFrames,
-            onSpeechStart: () => {
-                log("info", "VAD: speech start")
-                if (this.params.mode === "unlugged") {
-                    tail = false
-                    if (tailTimer !== null) {
-                        clearTimeout(tailTimer)
-                        tailTimer = null
-                    }
-                }
-            },
-            onSpeechEnd: (audio) => {
-                const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
-                log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
-                if (this.params.mode === "unlugged") {
-                    tail = true
-                    if (tailTimer !== null)
-                        clearTimeout(tailTimer)
-                    tailTimer = setTimeout(() => {
+        try {
+            this.vad = await RealTimeVAD.new({
+                model:                   "v5",
+                sampleRate:              this.config.audioSampleRate, /* before resampling to 16KHz */
+                frameSamples:            vadSamplesPerFrame,          /* after  resampling to 16KHz */
+                positiveSpeechThreshold: this.params.posSpeechThreshold,
+                negativeSpeechThreshold: this.params.negSpeechThreshold,
+                minSpeechFrames:         this.params.minSpeechFrames,
+                redemptionFrames:        this.params.redemptionFrames,
+                preSpeechPadFrames:      this.params.preSpeechPadFrames,
+                onSpeechStart: () => {
+                    if (this.destroyed)
+                        return
+                    this.log("info", "VAD: speech start")
+                    if (this.params.mode === "unplugged") {
                         tail = false
-                        tailTimer = null
-                    }, this.params.postSpeechTail)
-                }
-            },
-            onVADMisfire: () => {
-                log("info", "VAD: speech end (segment too short)")
-                if (this.params.mode === "unlugged") {
-                    tail = true
-                    if (tailTimer !== null)
-                        clearTimeout(tailTimer)
-                    tailTimer = setTimeout(() => {
-                        tail = false
-                        tailTimer = null
-                    }, this.params.postSpeechTail)
-                }
-            },
-            onFrameProcessed: (audio) => {
-                /*  annotate the current audio segment  */
-                const element = this.queueVAD.peek()
-                if (element === undefined || element.type !== "audio-frame")
-                    throw new Error("internal error which cannot happen: no more queued element")
-                const segment = element.segmentData[element.segmentIdx++]
-                segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
-                /*  annotate the entire audio chunk  */
-                if (element.segmentIdx >= element.segmentData.length) {
-                    let isSpeech = false
-                    for (const segment of element.segmentData) {
-                        if (segment.isSpeech) {
-                            isSpeech = true
-                            break
+                        if (this.tailTimer !== null) {
+                            clearTimeout(this.tailTimer)
+                            this.tailTimer = null
+                        }
+                    }
+                },
+                onSpeechEnd: (audio) => {
+                    if (this.destroyed)
+                        return
+                    const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
+                    this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
+                    if (this.params.mode === "unplugged") {
+                        tail = true
+                        if (this.tailTimer !== null) {
+                            clearTimeout(this.tailTimer)
+                            this.tailTimer = null
                         }
+                        this.tailTimer = setTimeout(() => {
+                            if (this.destroyed || this.tailTimer === null)
+                                return
+                            tail = false
+                            this.tailTimer = null
+                        }, this.params.postSpeechTail)
+                    }
+                },
+                onVADMisfire: () => {
+                    if (this.destroyed) return
+                    this.log("info", "VAD: speech end (segment too short)")
+                    if (this.params.mode === "unplugged") {
+                        tail = true
+                        if (this.tailTimer !== null) {
+                            clearTimeout(this.tailTimer)
+                            this.tailTimer = null
+                        }
+                        this.tailTimer = setTimeout(() => {
+                            if (this.destroyed || this.tailTimer === null)
+                                return
+                            tail = false
+                            this.tailTimer = null
+                        }, this.params.postSpeechTail)
+                    }
+                },
+                onFrameProcessed: (audio) => {
+                    if (this.destroyed)
+                        return
+                    try {
+                        /*  annotate the current audio segment  */
+                        const element = this.queueVAD.peek()
+                        if (element === undefined || element.type !== "audio-frame")
+                            throw new Error("internal error which cannot happen: no more queued element")
+                        if (element.segmentIdx >= element.segmentData.length)
+                            throw new Error("segment index out of bounds")
+                        const segment = element.segmentData[element.segmentIdx++]
+                        segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
+                        /*  annotate the entire audio chunk  */
+                        if (element.segmentIdx >= element.segmentData.length) {
+                            let isSpeech = false
+                            for (const segment of element.segmentData) {
+                                if (segment.isSpeech) {
+                                    isSpeech = true
+                                    break
+                                }
+                            }
+                            element.isSpeech = isSpeech
+                            this.queueVAD.touch()
+                            this.queueVAD.walk(+1)
+                        }
+                    }
+                    catch (error) {
+                        this.log("error", `VAD frame processing error: ${error}`)
                     }
-                    element.isSpeech = isSpeech
-                    this.queueVAD.touch()
-                    this.queueVAD.walk(+1)
                 }
-            }
-        })
-        this.vad.start()
+            })
+            this.vad.start()
+        }
+        catch (error) {
+            throw new Error(`failed to initialize VAD: ${error}`)
+        }
         /*  provide Duplex stream and internally attach to VAD  */
         const self = this
@@ -156,47 +185,70 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
             /*  receive audio chunk (writable side of stream)  */
             write (chunk: SpeechFlowChunk, encoding, callback) {
+                if (self.destroyed) {
+                    callback(new Error("stream already destroyed"))
+                    return
+                }
                 if (!Buffer.isBuffer(chunk.payload))
                     callback(new Error("expected audio input as Buffer chunks"))
                 else if (chunk.payload.byteLength === 0)
                     callback()
                 else {
-                    /*  convert audio samples from PCM/I16 to PCM/F32  */
-                    const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
-                    /*  segment audio samples as individual VAD-sized frames  */
-                    const segmentData: AudioQueueElementSegment[] = []
-                    const chunkSize = vadSamplesPerFrame * (self.config.audioSampleRate / vadSampleRateTarget)
-                    const chunks = Math.trunc(data.length / chunkSize)
-                    for (let i = 0; i < chunks; i++) {
-                        const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
-                        const segment: AudioQueueElementSegment = { data: frame }
-                        segmentData.push(segment)
-                    }
-                    if ((chunks * chunkSize) < data.length) {
-                        const frame = new Float32Array(chunkSize)
-                        frame.fill(0)
-                        frame.set(data.slice(chunks * chunkSize, data.length))
-                        const segment: AudioQueueElementSegment = { data: frame }
-                        segmentData.push(segment)
-                    }
+                    try {
+                        /*  convert audio samples from PCM/I16 to PCM/F32  */
+                        const data = utils.convertBufToF32(chunk.payload,
+                            self.config.audioLittleEndian)
-                    /*  queue the results  */
-                    self.queueRecv.append({
-                        type: "audio-frame", chunk,
-                        segmentIdx: 0, segmentData
-                    })
+                        /*  segment audio samples as individual VAD-sized frames  */
+                        const segmentData: AudioQueueElementSegment[] = []
+                        const chunkSize = vadSamplesPerFrame *
+                            (self.config.audioSampleRate / vadSampleRateTarget)
+                        const chunks = Math.trunc(data.length / chunkSize)
+                        for (let i = 0; i < chunks; i++) {
+                            const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
+                            const segment: AudioQueueElementSegment = { data: frame }
+                            segmentData.push(segment)
+                        }
+                        if ((chunks * chunkSize) < data.length) {
+                            const frame = new Float32Array(chunkSize)
+                            frame.fill(0)
+                            frame.set(data.slice(chunks * chunkSize, data.length))
+                            const segment: AudioQueueElementSegment = { data: frame }
+                            segmentData.push(segment)
+                        }
-                    /*  push segments through Voice Activity Detection (VAD)  */
-                    for (const segment of segmentData)
-                        self.vad!.processAudio(segment.data)
+                        /*  queue the results  */
+                        self.queueRecv.append({
+                            type: "audio-frame", chunk,
+                            segmentIdx: 0, segmentData
+                        })
-                    callback()
+                        /*  push segments through Voice Activity Detection (VAD)  */
+                        if (self.vad && !self.destroyed) {
+                            try {
+                                for (const segment of segmentData)
+                                    self.vad.processAudio(segment.data)
+                            }
+                            catch (error) {
+                                self.log("error", `VAD processAudio error: ${error}`)
+                            }
+                        }
+                        callback()
+                    }
+                    catch (error) {
+                        callback(error instanceof Error ? error : new Error("VAD processing failed"))
+                    }
                 }
             },
             /*  receive no more audio chunks (writable side of stream)  */
             final (callback) {
+                if (self.destroyed) {
+                    callback()
+                    return
+                }
                 /*  signal end of file  */
                 self.queueRecv.append({ type: "audio-eof" })
                 callback()
@@ -204,12 +256,26 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
             /*  send audio chunk(s) (readable side of stream)  */
             read (_size) {
+                if (self.destroyed) {
+                    this.push(null)
+                    return
+                }
                 /*  try to perform read operation from scratch  */
                 const tryToRead = () => {
+                    if (self.destroyed) {
+                        this.push(null)
+                        return
+                    }
                     /*  flush pending audio chunks  */
                     const flushPendingChunks = () => {
                         let pushed = 0
                         while (true) {
+                            if (self.destroyed) {
+                                this.push(null)
+                                return
+                            }
                             const element = self.queueSend.peek()
                             if (element === undefined)
                                 break
@@ -233,23 +299,33 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                                 this.push(chunk)
                                 pushed++
                             }
-                            else if (self.params.mode === "unplugged" && pushed === 0)
+                            else if (self.params.mode === "unplugged" && pushed === 0) {
                                 /*  we have to await chunks now, as in unplugged
                                     mode we else would be never called again until
                                     we at least once push a new chunk as the result  */
-                                tryToRead()
+                                setTimeout(() => {
+                                    if (self.destroyed)
+                                        return
+                                    tryToRead()
+                                }, 0)
+                                return
+                            }
                         }
                     }
                     /*  await forthcoming audio chunks  */
                     const awaitForthcomingChunks = () => {
+                        if (self.destroyed)
+                            return
                         const element = self.queueSend.peek()
                         if (element !== undefined
                             && element.type === "audio-frame"
                             && element.isSpeech !== undefined)
                             flushPendingChunks()
-                        else
+                        else if (!self.destroyed) {
                             self.queue.once("write", awaitForthcomingChunks)
+                            self.activeEventListeners.add(awaitForthcomingChunks)
+                        }
                     }
                     const element = self.queueSend.peek()
@@ -259,8 +335,10 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                         && element.type === "audio-frame"
                         && element.isSpeech !== undefined)
                         flushPendingChunks()
-                    else
+                    else if (!self.destroyed) {
                         self.queue.once("write", awaitForthcomingChunks)
+                        self.activeEventListeners.add(awaitForthcomingChunks)
+                    }
                 }
                 tryToRead()
             }
@@ -269,15 +347,43 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
     /*  close node  */
     async close () {
+        /*  indicate destruction  */
+        this.destroyed = true
+        /*  cleanup tail timer  */
+        if (this.tailTimer !== null) {
+            clearTimeout(this.tailTimer)
+            this.tailTimer = null
+        }
+        /*  remove all event listeners  */
+        this.activeEventListeners.forEach((listener) => {
+            this.queue.removeListener("write", listener)
+        })
+        this.activeEventListeners.clear()
         /*  close stream  */
         if (this.stream !== null) {
             this.stream.destroy()
             this.stream = null
         }
+        /*  cleanup queue pointers before closing VAD to prevent callback access  */
+        this.queue.pointerDelete("recv")
+        this.queue.pointerDelete("vad")
+        this.queue.pointerDelete("send")
         /*  close VAD  */
         if (this.vad !== null) {
-            await this.vad.flush()
+            try {
+                const flushPromise = this.vad.flush()
+                const timeoutPromise = new Promise((resolve) =>
+                    setTimeout(resolve, 5000))
+                await Promise.race([ flushPromise, timeoutPromise ])
+            }
+            catch (error) {
+                this.log("warning", `VAD flush error during close: ${error}`)
+            }
             this.vad.destroy()
             this.vad = null
         }

package/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -103,8 +103,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
             decodeStrings:      false,
             highWaterMark:      1,
             transform (chunk: SpeechFlowChunk, encoding, callback) {
-                if (!Buffer.isBuffer(chunk.payload))
+                if (!Buffer.isBuffer(chunk.payload)) {
                     callback(new Error("invalid chunk payload type"))
+                    return
+                }
                 else if (firstChunk) {
                     if (self.params.mode === "encode") {
                         /*  convert raw/PCM to WAV/PCM
@@ -127,6 +129,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
                     }
                     else if (self.params.mode === "decode") {
                         /*  convert WAV/PCM to raw/PCM  */
+                        if (chunk.payload.length < 44) {
+                            callback(new Error("WAV header too short, expected at least 44 bytes"))
+                            return
+                        }
                         const header = readWavHeader(chunk.payload)
                         self.log("info", "WAV audio stream: " +
                             `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
@@ -134,20 +140,30 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
                             `channels=${header.channels} ` +
                             `sampleRate=${header.sampleRate} ` +
                             `bitDepth=${header.bitDepth}`)
-                        if (header.audioFormat !== 0x0001 /* PCM */)
-                            throw new Error("WAV not based on PCM format")
-                        if (header.bitDepth !== 16)
-                            throw new Error("WAV not based on 16 bit samples")
-                        if (header.sampleRate !== 48000)
-                            throw new Error("WAV not based on 48Khz sample rate")
-                        if (header.channels !== 1)
-                            throw new Error("WAV not based on mono channel")
+                        if (header.audioFormat !== 0x0001 /* PCM */) {
+                            callback(new Error("WAV not based on PCM format"))
+                            return
+                        }
+                        if (header.bitDepth !== self.config.audioBitDepth) {
+                            callback(new Error(`WAV not based on ${self.config.audioBitDepth} bit samples`))
+                            return
+                        }
+                        if (header.sampleRate !== self.config.audioSampleRate) {
+                            callback(new Error(`WAV not based on ${self.config.audioSampleRate}Hz sample rate`))
+                            return
+                        }
+                        if (header.channels !== self.config.audioChannels) {
+                            callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
+                            return
+                        }
                         chunk.payload = chunk.payload.subarray(44)
                         this.push(chunk)
                         callback()
                     }
-                    else
-                        throw new Error(`invalid operation mode "${self.params.mode}"`)
+                    else {
+                        callback(new Error(`invalid operation mode "${self.params.mode}"`))
+                        return
+                    }
                 }
                 else {
                     /*  pass-through original chunk  */