npm - speechflow - Versions diffs - 1.6.7 → 1.7.1 - Mend

speechflow 1.6.7 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts CHANGED Viewed

@@ -76,6 +76,10 @@ class CompressorProcessor extends AudioWorkletProcessor {
         /*  determine number of channels  */
         const nCh = input.length
+        /*  reset envelope array if channel count changed  */
+        if (nCh !== this.env.length)
+            this.env = []
         /*  initially just copy input to output (pass-through)  */
         for (let c = 0; c < output.length; c++) {
             if (!output[c] || !input[c])

package/speechflow-cli/src/speechflow-node-a2a-compressor.ts CHANGED Viewed

@@ -245,8 +245,10 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
                     /*  compress chunk  */
                     const payload = util.convertBufToI16(chunk.payload)
                     self.compressor?.process(payload).then((result) => {
-                        if (self.closing)
-                            throw new Error("stream already destroyed")
+                        if (self.closing) {
+                            callback(new Error("stream already destroyed"))
+                            return
+                        }
                         if ((self.params.type === "standalone" && self.params.mode === "compress") ||
                             (self.params.type === "sidechain"  && self.params.mode === "adjust")     ) {
                             /*  take over compressed data  */

package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts CHANGED Viewed

@@ -113,7 +113,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
             const expectedOutLevelDB = levelDB + gainDB + makeupDB
             if (expectedOutLevelDB < floorDB) {
                 const neededLiftDB = floorDB - expectedOutLevelDB
-                gainLin /= util.dB2lin(neededLiftDB)
+                gainLin *= util.dB2lin(neededLiftDB)
             }
             /*  apply gain change to channel  */

package/speechflow-cli/src/speechflow-node-a2a-expander.ts CHANGED Viewed

@@ -168,8 +168,10 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
                     /*  expand chunk  */
                     const payload = util.convertBufToI16(chunk.payload)
                     self.expander?.process(payload).then((result) => {
-                        if (self.closing)
-                            throw new Error("stream already destroyed")
+                        if (self.closing) {
+                            callback(new Error("stream already destroyed"))
+                            return
+                        }
                         /*  take over expanded data  */
                         const payload = util.convertI16ToBuf(result)

package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts CHANGED Viewed

@@ -99,7 +99,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
         })
         /*  wrap streams with conversions for chunk vs plain audio  */
-        const wrapper1 = util.createTransformStreamForWritableSide()
+        const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
         const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
         this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
     }
@@ -114,7 +114,9 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
         /*  shutdown FFmpeg  */
         if (this.ffmpeg !== null) {
-            util.run(() => this.ffmpeg!.kill(), () => {})
+            util.run("stopping FFmpeg process",
+                () => this.ffmpeg!.kill(),
+                () => {})
             this.ffmpeg = null
         }
     }

package/speechflow-cli/src/speechflow-node-a2a-filler.ts CHANGED Viewed

@@ -15,6 +15,8 @@ import * as util                           from "./speechflow-util"
 class AudioFiller extends EventEmitter {
     private emittedEndSamples = 0           /* stream position in samples already emitted */
+    private maxInputEndSamples = 0
+    private lastMeta: Map<string, any> | undefined = undefined
     private readonly bytesPerSample = 2     /* PCM I16 */
     private readonly bytesPerFrame: number
     private readonly sampleTolerance = 0.5  /* tolerance for floating-point sample comparisons */
@@ -25,12 +27,12 @@ class AudioFiller extends EventEmitter {
     }
     /*  optional helper to allow subscribing with strong typing  */
-    public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
+    public on(event: "chunk", listener: (chunk: SpeechFlowChunk, type: string) => void): this
     public on(event: string, listener: (...args: any[]) => void): this {
         return super.on(event, listener)
     }
-    /*  convert fractional samples to duration  */
+    /*  convert fractional samples from duration  */
     private samplesFromDuration(duration: Duration): number {
         const seconds = duration.as("seconds")
         const samples = seconds * this.sampleRate
@@ -51,8 +53,9 @@ class AudioFiller extends EventEmitter {
         const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
         const timestampStart = this.durationFromSamples(fromSamples)
         const timestampEnd   = this.durationFromSamples(toSamples)
-        const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload, meta ? new Map(meta) : undefined)
-        this.emit("chunk", chunk)
+        const chunk = new SpeechFlowChunk(timestampStart, timestampEnd,
+            "final", "audio", payload, meta ? new Map(meta) : undefined)
+        this.emit("chunk", chunk, "silence")
     }
     /*  add a chunk of audio for processing  */
@@ -62,6 +65,12 @@ class AudioFiller extends EventEmitter {
         if (endSamp < startSamp)
             throw new Error("invalid timestamps")
+        /*  track maximum input end timestamp and last metadata for trailing silence  */
+        if (endSamp > this.maxInputEndSamples) {
+            this.maxInputEndSamples = endSamp
+            this.lastMeta = chunk.meta ? new Map(chunk.meta) : undefined
+        }
         /*  if chunk starts beyond what we've emitted, insert silence for the gap  */
         if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
             this.emitSilence(this.emittedEndSamples, startSamp, chunk.meta)
@@ -95,12 +104,20 @@ class AudioFiller extends EventEmitter {
         const outEndSamples   = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
         const timestampStart  = this.durationFromSamples(outStartSamples)
         const timestampEnd    = this.durationFromSamples(outEndSamples)
-        const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload, new Map(chunk.meta))
-        this.emit("chunk", c)
+        const c = new SpeechFlowChunk(timestampStart, timestampEnd,
+            "final", "audio", payload, new Map(chunk.meta))
+        this.emit("chunk", c, "content")
         /*  advance emitted cursor  */
         this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
     }
+    /*  signal end of processing and emit trailing silence  */
+    public done (): void {
+        /*  emit trailing silence if there's a gap between emitted and max input  */
+        if (this.maxInputEndSamples > this.emittedEndSamples + this.sampleTolerance)
+            this.emitSilence(this.emittedEndSamples, this.maxInputEndSamples, this.lastMeta)
+    }
 }
 /*  SpeechFlow node for filling audio gaps  */
@@ -137,12 +154,13 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
         this.sendQueue = new util.AsyncQueue<SpeechFlowChunk | null>()
         /*  shift chunks from filler to send queue  */
-        this.filler.on("chunk", (chunk) => {
+        this.filler.on("chunk", (chunk, type) => {
             this.sendQueue?.write(chunk)
         })
         /*  establish a duplex stream  */
         const self = this
+        const reads = new util.PromiseSet<void>()
         this.stream = new Stream.Duplex({
             readableObjectMode: true,
             writableObjectMode: true,
@@ -154,8 +172,6 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
                     callback(new Error("invalid chunk payload type"))
                 else {
                     try {
-                        if (self.closing || self.filler === null)
-                            throw new Error("stream already destroyed")
                         self.filler.add(chunk)
                         callback()
                     }
@@ -164,12 +180,37 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
                     }
                 }
             },
+            async final (callback) {
+                /*  short-circuit processing in case of own closing  */
+                if (self.closing) {
+                    callback()
+                    return
+                }
+                /*  signal end of stream  */
+                if (self.filler !== null && self.sendQueue !== null) {
+                    /*  optionally emit trailing silence
+                        (we have to wait for its internal "emit" operation to happen)  */
+                    self.filler.done()
+                    await util.sleep(10)
+                    /*  signal end of stream  */
+                    self.sendQueue.write(null)
+                }
+                /*  await all read operations  */
+                await reads.awaitAll()
+                /*  signal end of streaming  */
+                this.push(null)
+                callback()
+            },
             read (size) {
                 if (self.closing || self.sendQueue === null) {
                     this.push(null)
                     return
                 }
-                self.sendQueue.read().then((chunk) => {
+                reads.add(self.sendQueue.read().then((chunk) => {
                     if (self.closing || self.sendQueue === null) {
                         this.push(null)
                         return
@@ -178,22 +219,18 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
                         self.log("info", "received EOF signal")
                         this.push(null)
                     }
+                    else if (!(chunk.payload instanceof Buffer)) {
+                        self.log("warning", "invalid chunk (expected audio buffer)")
+                        this.push(null)
+                    }
                     else {
-                        self.log("debug", `received data (${chunk.payload.length} bytes)`)
+                        self.log("debug", `received data (${chunk.payload.byteLength} bytes)`)
                         this.push(chunk)
                     }
                 }).catch((error: unknown) => {
                     if (!self.closing && self.sendQueue !== null)
                         self.log("error", `queue read error: ${util.ensureError(error).message}`)
-                })
-            },
-            final (callback) {
-                if (self.closing) {
-                    callback()
-                    return
-                }
-                this.push(null)
-                callback()
+                }))
             }
         })
     }

package/speechflow-cli/src/speechflow-node-a2a-gain.ts CHANGED Viewed

@@ -73,11 +73,6 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
                 }
             },
             final (callback) {
-                if (self.closing) {
-                    callback()
-                    return
-                }
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-a2a-gender.ts CHANGED Viewed

@@ -107,7 +107,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
             })
             this.classifier = await Promise.race([
                 pipelinePromise,
-                util.timeoutPromise(30 * 1000, "model initialization timeout")
+                util.timeout(30 * 1000, "model initialization timeout")
             ]) as Transformers.AudioClassificationPipeline
         }
         catch (error) {
@@ -149,7 +149,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
             /*  classify audio  */
             const result = await Promise.race([
                 this.classifier(data),
-                util.timeoutPromise(30 * 1000, "classification timeout")
+                util.timeout(30 * 1000, "classification timeout")
             ]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
             const classified = Array.isArray(result) ?
                 result as Transformers.AudioClassificationOutput :
@@ -363,8 +363,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
         if (this.classifier !== null) {
             try {
                 const disposePromise = this.classifier.dispose()
-                const timeoutPromise = new Promise((resolve) => setTimeout(resolve, 5000))
-                await Promise.race([ disposePromise, timeoutPromise ])
+                await Promise.race([ disposePromise, util.sleep(5000) ])
             }
             catch (error) {
                 this.log("warning", `error during classifier cleanup: ${error}`)

package/speechflow-cli/src/speechflow-node-a2a-mute.ts CHANGED Viewed

@@ -107,11 +107,6 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
                 }
             },
             final (callback) {
-                if (self.closing) {
-                    callback()
-                    return
-                }
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-a2a-pitch.ts CHANGED Viewed

@@ -185,8 +185,7 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
                         this.push(chunk)
                         callback()
                     }).catch((error: unknown) => {
-                        if (!self.closing)
-                            callback(util.ensureError(error, "pitch shifting failed"))
+                        callback(util.ensureError(error, "pitch shifting failed"))
                     })
                 }
             },

package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts CHANGED Viewed

@@ -136,11 +136,6 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
                 }
             },
             final (callback) {
-                if (self.closing) {
-                    callback()
-                    return
-                }
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-a2a-speex.ts CHANGED Viewed

@@ -108,11 +108,6 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
                 }
             },
             final (callback) {
-                if (self.closing) {
-                    callback()
-                    return
-                }
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -141,10 +141,17 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
                             callback(new Error("WAV header too short, expected at least 44 bytes"))
                             return
                         }
-                        const header = readWavHeader(chunk.payload)
+                        let header: ReturnType<typeof readWavHeader>
+                        try {
+                            header = readWavHeader(chunk.payload)
+                        }
+                        catch (error) {
+                            callback(util.ensureError(error, "WAV header parsing failed"))
+                            return
+                        }
                         self.log("info", "WAV audio stream: " +
                             `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
-                                "0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
+                                "0x" + header.audioFormat.toString(16).padStart(4, "0")} ` +
                             `channels=${header.channels} ` +
                             `sampleRate=${header.sampleRate} ` +
                             `bitDepth=${header.bitDepth}`)
@@ -181,7 +188,6 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
                 }
             },
             final (callback) {
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-a2t-amazon.ts CHANGED Viewed

@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
     public static name = "a2t-amazon"
     /*  internal state  */
-    private client:            TranscribeStreamingClient     | null             = null
-    private clientStream:      AsyncIterable<TranscriptResultStream> | null     = null
-    private closing                                                           = false
-    private initTimeout:       ReturnType<typeof setTimeout> | null             = null
-    private connectionTimeout: ReturnType<typeof setTimeout> | null             = null
+    private client:            TranscribeStreamingClient                | null = null
+    private clientStream:      AsyncIterable<TranscriptResultStream>    | null = null
+    private closing                                                            = false
+    private connectionTimeout: ReturnType<typeof setTimeout>            | null = null
     private queue:             util.SingleQueue<SpeechFlowChunk | null> | null = null
     /*  construct node  */
@@ -194,16 +193,17 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                         this.queue?.write(chunk)
                     }
                 }
-            })().catch((err: Error) => {
-                this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${err}`)
+            })().catch((err: unknown) => {
+                this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
             })
         }
         /*  remember opening time to receive time zero offset  */
         this.timeOpen = DateTime.now()
-        /*  provide Duplex stream and internally attach to Deepgram API  */
+        /*  provide Duplex stream and internally attach to Amazon Transcribe API  */
         const self = this
+        const reads = new util.PromiseSet<void>()
         this.stream = new Stream.Duplex({
             writableObjectMode: true,
             readableObjectMode: true,
@@ -231,12 +231,29 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                     callback()
                 }
             },
+            async final (callback) {
+                if (self.closing || self.client === null) {
+                    callback()
+                    return
+                }
+                /*  await all read operations  */
+                await reads.awaitAll()
+                util.run(
+                    () => self.client!.destroy(),
+                    (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
+                )
+                audioQueue.push(null) /*  do not push null to stream, let Amazon Transcribe do it  */
+                audioQueue.destroy()
+                callback()
+            },
             read (size) {
                 if (self.closing || self.queue === null) {
                     this.push(null)
                     return
                 }
-                self.queue.read().then((chunk) => {
+                reads.add(self.queue.read().then((chunk) => {
                     if (self.closing || self.queue === null) {
                         this.push(null)
                         return
@@ -252,20 +269,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                 }).catch((error: unknown) => {
                     if (!self.closing && self.queue !== null)
                         self.log("error", `queue read error: ${util.ensureError(error).message}`)
-                })
-            },
-            final (callback) {
-                if (self.closing || self.client === null) {
-                    callback()
-                    return
-                }
-                util.run(
-                    () => self.client!.destroy(),
-                    (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
-                )
-                audioQueue.push(null) /*  do not push null to stream, let Amazon Transcribe do it  */
-                audioQueue.destroy()
-                callback()
+                }))
             }
         })
     }
@@ -276,10 +280,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
         this.closing = true
         /*  cleanup all timers  */
-        if (this.initTimeout !== null) {
-            clearTimeout(this.initTimeout)
-            this.initTimeout = null
-        }
         if (this.connectionTimeout !== null) {
             clearTimeout(this.connectionTimeout)
             this.connectionTimeout = null

package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts CHANGED Viewed

@@ -21,10 +21,9 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
     public static name = "a2t-deepgram"
     /*  internal state  */
-    private dg:                Deepgram.LiveClient | null                       = null
-    private closing                                                           = false
-    private initTimeout:       ReturnType<typeof setTimeout> | null             = null
-    private connectionTimeout: ReturnType<typeof setTimeout> | null             = null
+    private dg:                Deepgram.LiveClient                      | null = null
+    private closing                                                            = false
+    private connectionTimeout: ReturnType<typeof setTimeout>            | null = null
     private queue:             util.SingleQueue<SpeechFlowChunk | null> | null = null
     /*  construct node  */
@@ -41,6 +40,10 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
             interim:  { type: "boolean", val: false,    pos: 3 }
         })
+        /*  sanity check parameters  */
+        if (!this.params.key)
+            throw new Error("Deepgram API key not configured")
         /*  declare node input/output format  */
         this.input  = "audio"
         this.output = "text"
@@ -126,7 +129,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                 this.log("info", `text received (start: ${data.start}s, ` +
                     `duration: ${data.duration.toFixed(2)}s, ` +
                     `kind: ${isFinal ? "final" : "intermediate"}): ` +
-                    `${text}"`)
+                    `"${text}"`)
                 const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
                 const end   = start.plus({ seconds: data.duration })
                 const metas = metastore.fetch(start, end)
@@ -163,14 +166,16 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
             this.log("error", `error: ${error.message}`)
             if (!this.closing && this.queue !== null)
                 this.queue.write(null)
-            this.emit("error")
+            this.emit("error", error)
         })
         /*  wait for Deepgram API to be available  */
         await new Promise((resolve, reject) => {
             this.connectionTimeout = setTimeout(() => {
-                this.connectionTimeout = null
-                reject(new Error("Deepgram: timeout waiting for connection open"))
+                if (this.connectionTimeout !== null) {
+                    this.connectionTimeout = null
+                    reject(new Error("Deepgram: timeout waiting for connection open"))
+                }
             }, 8000)
             this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
                 this.log("info", "connection open")
@@ -187,6 +192,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
         /*  provide Duplex stream and internally attach to Deepgram API  */
         const self = this
+        const reads = new util.PromiseSet<void>()
         this.stream = new Stream.Duplex({
             writableObjectMode: true,
             readableObjectMode: true,
@@ -217,12 +223,33 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                     callback()
                 }
             },
+            async final (callback) {
+                /*  short-circuiting in case of own closing  */
+                if (self.closing || self.dg === null) {
+                    callback()
+                    return
+                }
+                /*  close Deepgram API  */
+                try {
+                    self.dg.requestClose()
+                }
+                catch (error) {
+                    self.log("warning", `error closing Deepgram connection: ${error}`)
+                }
+                /*  await all read operations  */
+                await reads.awaitAll()
+                /*  NOTICE: do not push null here -- let the Deepgram close event handle it  */
+                callback()
+            },
             read (size) {
                 if (self.closing || self.queue === null) {
                     this.push(null)
                     return
                 }
-                self.queue.read().then((chunk) => {
+                reads.add(self.queue.read().then((chunk) => {
                     if (self.closing || self.queue === null) {
                         this.push(null)
                         return
@@ -238,21 +265,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
                 }).catch((error: unknown) => {
                     if (!self.closing && self.queue !== null)
                         self.log("error", `queue read error: ${util.ensureError(error).message}`)
-                })
-            },
-            final (callback) {
-                if (self.closing || self.dg === null) {
-                    callback()
-                    return
-                }
-                try {
-                    self.dg.requestClose()
-                }
-                catch (error) {
-                    self.log("warning", `error closing Deepgram connection: ${error}`)
-                }
-                /*  NOTICE: do not push null here -- let the Deepgram close event handle it  */
-                callback()
+                }))
             }
         })
     }
@@ -263,10 +276,6 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
         this.closing = true
         /*  cleanup all timers  */
-        if (this.initTimeout !== null) {
-            clearTimeout(this.initTimeout)
-            this.initTimeout = null
-        }
         if (this.connectionTimeout !== null) {
             clearTimeout(this.connectionTimeout)
             this.connectionTimeout = null