npm - speechflow - Versions diffs - 2.0.0 → 2.0.2 - Mend

speechflow 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/speechflow-cli/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -85,6 +85,18 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
             }
         }
+        /*  helper function for tail timer handling  */
+        const startTailTimer = () => {
+            tail = true
+            clearTailTimer()
+            this.tailTimer = setTimeout(() => {
+                if (this.closing || this.tailTimer === null)
+                    return
+                tail = false
+                this.tailTimer = null
+            }, this.params.postSpeechTail)
+        }
         /*  establish Voice Activity Detection (VAD) facility  */
         let tail = false
         try {
@@ -111,31 +123,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         return
                     const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
                     this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
-                    if (this.params.mode === "unplugged") {
-                        tail = true
-                        clearTailTimer()
-                        this.tailTimer = setTimeout(() => {
-                            if (this.closing || this.tailTimer === null)
-                                return
-                            tail = false
-                            this.tailTimer = null
-                        }, this.params.postSpeechTail)
-                    }
+                    if (this.params.mode === "unplugged")
+                        startTailTimer()
                 },
                 onVADMisfire: () => {
                     if (this.closing)
                         return
                     this.log("info", "VAD: speech end (segment too short)")
-                    if (this.params.mode === "unplugged") {
-                        tail = true
-                        clearTailTimer()
-                        this.tailTimer = setTimeout(() => {
-                            if (this.closing || this.tailTimer === null)
-                                return
-                            tail = false
-                            this.tailTimer = null
-                        }, this.params.postSpeechTail)
-                    }
+                    if (this.params.mode === "unplugged")
+                        startTailTimer()
                 },
                 onFrameProcessed: (audio) => {
                     if (this.closing)
@@ -144,7 +140,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         /*  annotate the current audio segment  */
                         const element = this.queueVAD.peek()
                         if (element === undefined || element.type !== "audio-frame")
-                            throw new Error("internal error which cannot happen: no more queued element")
+                            throw new Error("internal error that cannot happen: no more queued element")
                         if (element.segmentIdx >= element.segmentData.length)
                             throw new Error("segment index out of bounds")
                         const segment = element.segmentData[element.segmentIdx++]
@@ -227,6 +223,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                             }
                         }
+                        /*  signal completion  */
                         callback()
                     }
                     catch (error) {
@@ -322,6 +319,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         }
                     }
+                    /*  peek at send queue element  */
                     const element = self.queueSend.peek()
                     if (element !== undefined && element.type === "audio-eof")
                         this.push(null)
@@ -371,8 +369,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
         if (this.vad !== null) {
             try {
                 const flushPromise = this.vad.flush()
-                const timeoutPromise = new Promise((resolve) =>
-                    setTimeout(resolve, 5000))
+                const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
                 await Promise.race([ flushPromise, timeoutPromise ])
             }
             catch (error) {

package/speechflow-cli/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -21,15 +21,18 @@ const writeWavHeader = (
     const sampleRate   = options?.sampleRate  ?? 44100 /* 44KHz */
     const bitDepth     = options?.bitDepth    ?? 16    /* 16-Bit */
+    /*  determine header dimensions  */
     const headerLength = 44
     const maxDataSize  = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
     const dataLength   = length ?? maxDataSize
     const fileSize     = dataLength + headerLength
     const header       = Buffer.alloc(headerLength)
+    /*  calculate byte rate and block alignment  */
     const byteRate     = (sampleRate * channels * bitDepth) / 8
     const blockAlign   = (channels * bitDepth) / 8
+    /*  write header fields  */
     let offset = 0
     header.write("RIFF", offset);               offset += 4
     header.writeUInt32LE(fileSize - 8, offset); offset += 4
@@ -45,6 +48,7 @@ const writeWavHeader = (
     header.write("data", offset);               offset += 4
     header.writeUInt32LE(dataLength, offset);   offset += 4
+    /*  return completed header  */
     return header
 }
@@ -53,6 +57,7 @@ const readWavHeader = (buffer: Buffer) => {
     if (buffer.length < 44)
         throw new Error("WAV header too short, expected at least 44 bytes")
+    /*  read header fields  */
     let offset = 0
     const riffHead     = buffer.subarray(offset, offset + 4).toString(); offset += 4
     const fileSize     = buffer.readUInt32LE(offset);                    offset += 4
@@ -68,6 +73,7 @@ const readWavHeader = (buffer: Buffer) => {
     const data         = buffer.subarray(offset, offset + 4).toString(); offset += 4
     const dataLength   = buffer.readUInt32LE(offset);                    offset += 4
+    /*  validate RIFF header  */
     if (riffHead !== "RIFF")
         throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
     if (waveHead !== "WAVE")
@@ -77,6 +83,7 @@ const readWavHeader = (buffer: Buffer) => {
     if (data !== "data")
         throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
+    /*  return parsed header data  */
     return {
         riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
         channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength

package/speechflow-cli/src/speechflow-node-a2t-amazon.ts CHANGED Viewed

@@ -53,7 +53,7 @@ class AsyncQueue<T> {
                 continue
             }
             else {
-                const it = await new Promise<IteratorResult<T>>((resolve) => this.resolvers.push(resolve))
+                const it = await new Promise<IteratorResult<T>>((resolve) => { this.resolvers.push(resolve) })
                 if (it.done)
                     return
                 yield it.value
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
     public static name = "a2t-amazon"
     /*  internal state  */
-    private client:            TranscribeStreamingClient                | null = null
-    private clientStream:      AsyncIterable<TranscriptResultStream>    | null = null
-    private closing                                                            = false
-    private connectionTimeout: ReturnType<typeof setTimeout>            | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null> | null = null
+    private client:       TranscribeStreamingClient                | null = null
+    private clientStream: AsyncIterable<TranscriptResultStream>    | null = null
+    private closing                                                       = false
+    private queue:        util.SingleQueue<SpeechFlowChunk | null> | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -126,8 +125,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                 secretAccessKey: this.params.secKey
             }
         })
-        if (this.client === null)
-            throw new Error("failed to establish Amazon Transcribe client")
         /*  create an AudioStream for Amazon Transcribe  */
         const audioQueue = new AsyncQueue<Uint8Array>()
@@ -236,11 +233,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
                     callback()
                     return
                 }
-                /*  await all read operations  */
                 await reads.awaitAll()
-                util.run(
+                util.run("closing Amazon Transcribe connection",
                     () => self.client!.destroy(),
                     (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
                 )
@@ -279,12 +273,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
         /*  indicate closing first to stop all async operations  */
         this.closing = true
-        /*  cleanup all timers  */
-        if (this.connectionTimeout !== null) {
-            clearTimeout(this.connectionTimeout)
-            this.connectionTimeout = null
-        }
         /*  close queue  */
         if (this.queue !== null) {
             this.queue.write(null)

package/speechflow-cli/src/speechflow-node-a2t-google.ts CHANGED Viewed

@@ -22,11 +22,10 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
     public static name = "a2t-google"
     /*  internal state  */
-    private client:            GoogleSpeech.SpeechClient                                   | null = null
-    private recognizeStream:   ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
-    private connectionTimeout: ReturnType<typeof setTimeout>                               | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null>                    | null = null
-    private closing                                                                               = false
+    private client:          GoogleSpeech.SpeechClient                                   | null = null
+    private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
+    private queue:           util.SingleQueue<SpeechFlowChunk | null>                    | null = null
+    private closing                                                                             = false
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -280,12 +279,6 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
         /*  indicate closing first to stop all async operations  */
         this.closing = true
-        /*  cleanup all timers  */
-        if (this.connectionTimeout !== null) {
-            clearTimeout(this.connectionTimeout)
-            this.connectionTimeout = null
-        }
         /*  shutdown stream  */
         if (this.stream !== null) {
             await util.destroyStream(this.stream)

package/speechflow-cli/src/speechflow-node-a2t-openai.ts CHANGED Viewed

@@ -23,12 +23,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
     public static name = "a2t-openai"
     /*  internal state  */
-    private openai:            OpenAI | null = null
-    private ws:                ws.WebSocket | null = null
-    private queue:             util.SingleQueue<SpeechFlowChunk | null> | null = null
-    private resampler:         SpeexResampler | null = null
-    private closing            = false
-    private connectionTimeout: ReturnType<typeof setTimeout> | null = null
+    private openai:            OpenAI                                    | null = null
+    private ws:                ws.WebSocket                              | null = null
+    private queue:             util.SingleQueue<SpeechFlowChunk | null>  | null = null
+    private resampler:         SpeexResampler                            | null = null
+    private closing                                                             = false
+    private connectionTimeout: ReturnType<typeof setTimeout>             | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -150,6 +150,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
         })
         this.ws.on("error", (err) => {
             this.log("error", `WebSocket connection error: ${err}`)
+            if (!this.closing && this.queue !== null)
+                this.queue.write(null)
+            this.emit("error", err)
         })
         /*  track speech timing by item_id (OpenAI provides timestamps via VAD events)  */
@@ -164,6 +167,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             }, new Map<string, any>())
         }
+        /*  track transcription text  */
         let text = ""
         this.ws.on("message", (data) => {
             let ev: any
@@ -353,7 +357,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
             this.ws.close()
             this.ws = null
         }
-        this.openai = null
+        if (this.openai !== null)
+            this.openai = null
         /*  close resampler  */
         this.resampler = null

package/speechflow-cli/src/speechflow-node-t2a-amazon.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import Stream from "node:stream"
 /*  external dependencies  */
 import { getStreamAsBuffer } from "get-stream"
+import { Duration }          from "luxon"
 import SpeexResampler        from "speex-resampler"
 import {
     PollyClient, SynthesizeSpeechCommand,
@@ -25,9 +26,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
     public static name = "t2a-amazon"
     /*  internal state  */
-    private client: PollyClient | null = null
-    private closing = false
+    private client:    PollyClient    | null = null
     private resampler: SpeexResampler | null = null
+    private closing                          = false
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -129,22 +130,43 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
                 }
                 if (Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
-                else if (chunk.payload.length > 0) {
+                else if (chunk.payload === "")
+                    callback()
+                else {
+                    let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
+                        processTimeout = null
+                        callback(new Error("AWS Polly API timeout"))
+                    }, 60 * 1000)
+                    const clearProcessTimeout = () => {
+                        if (processTimeout !== null) {
+                            clearTimeout(processTimeout)
+                            processTimeout = null
+                        }
+                    }
                     self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
                     textToSpeech(chunk.payload as string).then((buffer) => {
-                        if (self.closing)
-                            throw new Error("stream destroyed during processing")
+                        if (self.closing) {
+                            clearProcessTimeout()
+                            callback(new Error("stream destroyed during processing"))
+                            return
+                        }
+                        /*  calculate actual audio duration from PCM buffer size  */
+                        const durationMs = util.audioBufferDuration(buffer,
+                            self.config.audioSampleRate, self.config.audioBitDepth) * 1000
+                        /*  create new chunk with recalculated timestamps  */
                         const chunkNew = chunk.clone()
-                        chunkNew.type = "audio"
-                        chunkNew.payload = buffer
+                        chunkNew.type         = "audio"
+                        chunkNew.payload      = buffer
+                        chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
+                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }).catch((error: unknown) => {
-                        callback(util.ensureError(error, "failed to send to AWS Polly"))
+                        clearProcessTimeout()
+                        callback(util.ensureError(error, "AWS Polly processing failed"))
                     })
                 }
-                else
-                    callback()
             },
             final (callback) {
                 callback()

package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts CHANGED Viewed

@@ -24,8 +24,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
     /*  internal state  */
     private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
-    private closing = false
-    private resampler: SpeexResampler | null = null
+    private resampler:  SpeexResampler              | null = null
+    private closing                                        = false
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -131,8 +131,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
             })
         }
-        /*  establish resampler from ElevenLabs's maximum 24Khz
-            output to our standard audio sample rate (48KHz)  */
+        /*  establish resampler from ElevenLabs's tier-dependent
+            output sample rate to our standard audio sample rate (48KHz)  */
         this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
         /*  create transform stream and connect it to the ElevenLabs API  */
@@ -147,6 +147,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
                     callback(new Error("stream already destroyed"))
                 else if (Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
+                else if (chunk.payload === "")
+                    callback()
                 else {
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null

package/speechflow-cli/src/speechflow-node-t2a-google.ts CHANGED Viewed

@@ -126,11 +126,8 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
                     callback(new Error("stream already destroyed"))
                 else if (Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
-                else if (chunk.payload === "") {
-                    /*  pass through empty chunks  */
-                    this.push(chunk)
+                else if (chunk.payload === "")
                     callback()
-                }
                 else {
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null

package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import Stream from "node:stream"
 /*  external dependencies  */
 import { KokoroTTS }  from "kokoro-js"
+import { Duration }   from "luxon"
 import SpeexResampler from "speex-resampler"
 /*  internal dependencies  */
@@ -21,9 +22,9 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
     public static name = "t2a-kokoro"
     /*  internal state  */
-    private kokoro: KokoroTTS | null = null
-    private closing = false
+    private kokoro:    KokoroTTS      | null = null
     private resampler: SpeexResampler | null = null
+    private closing                          = false
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -122,9 +123,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
             }
             /*  resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz  */
-            const buffer2 = this.resampler!.processChunk(buffer1)
-            return buffer2
+            return this.resampler!.processChunk(buffer1)
         }
         /*  create transform stream and connect it to the Kokoro API  */
@@ -139,18 +138,42 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
                     callback(new Error("stream already destroyed"))
                 else if (Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
+                else if (chunk.payload === "")
+                    callback()
                 else {
+                    let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
+                        processTimeout = null
+                        callback(new Error("Kokoro TTS timeout"))
+                    }, 60 * 1000)
+                    const clearProcessTimeout = () => {
+                        if (processTimeout !== null) {
+                            clearTimeout(processTimeout)
+                            processTimeout = null
+                        }
+                    }
                     text2speech(chunk.payload).then((buffer) => {
-                        if (self.closing)
-                            throw new Error("stream destroyed during processing")
+                        if (self.closing) {
+                            clearProcessTimeout()
+                            callback(new Error("stream destroyed during processing"))
+                            return
+                        }
                         self.log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
+                        /*  calculate actual audio duration from PCM buffer size  */
+                        const durationMs = util.audioBufferDuration(buffer,
+                            self.config.audioSampleRate, self.config.audioBitDepth) * 1000
+                        /*  create new chunk with recalculated timestamps  */
                         const chunkNew = chunk.clone()
-                        chunkNew.type = "audio"
-                        chunkNew.payload = buffer
+                        chunkNew.type         = "audio"
+                        chunkNew.payload      = buffer
+                        chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
+                        clearProcessTimeout()
                         this.push(chunkNew)
                         callback()
                     }).catch((error: unknown) => {
-                        callback(util.ensureError(error))
+                        clearProcessTimeout()
+                        callback(util.ensureError(error, "Kokoro processing failed"))
                     })
                 }
             },

package/speechflow-cli/src/speechflow-node-t2a-openai.ts CHANGED Viewed

@@ -103,11 +103,8 @@ export default class SpeechFlowNodeT2AOpenAI extends SpeechFlowNode {
                     callback(new Error("stream already destroyed"))
                 else if (Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
-                else if (chunk.payload === "") {
-                    /*  pass through empty chunks  */
-                    this.push(chunk)
+                else if (chunk.payload === "")
                     callback()
-                }
                 else {
                     let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
                         processTimeout = null