npm - speechflow - Versions diffs - 2.3.0 → 2.3.1 - Mend

speechflow 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/speechflow-cli/src/speechflow-node-t2t-sentence.ts CHANGED Viewed

@@ -35,6 +35,66 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
     private queueRecv  = this.queue.pointerUse("recv")
     private closing    = false
     private workingOffTimer: ReturnType<typeof setTimeout> | null = null
+    private lastChunkTime = 0
+    /*  known abbreviations from English and German (lowercased),
+        which should NOT be treated as sentence boundaries  */
+    private static abbreviations = new Set([
+        "prof", "dr", "mr", "mrs", "ms", "jr", "sr", "st",
+        "vs", "etc", "ca", "bzw", "bspw", "usw", "sog", "ggf", "evtl"
+    ])
+    /*  find the first valid sentence boundary in text  */
+    private static findSentenceBoundary (text: string): { sentence: string, rest: string } | null {
+        for (let i = 0; i < text.length; i++) {
+            /*  match sentence-ending punctuation (including ellipsis "..." and "…")  */
+            const pm = /^(\.\.\.|\u2026|\.|\?|!)/.exec(text.slice(i, i + 3))
+            if (!pm)
+                continue
+            const firstPunctPos = i
+            i += pm[1].length - 1
+            /*  extract the word preceding the punctuation mark  */
+            let j = Math.max(0, firstPunctPos - 1)
+            while (j >= 0) {
+                /*  handle surrogate pairs (for characters outside the BMP)  */
+                if (j > 0 && /[\uDC00-\uDFFF]/.test(text[j])) {
+                    if (!/^\p{L}$/u.test(text[j - 1] + text[j]))
+                        break
+                    j -= 2
+                }
+                else {
+                    if (!/^\p{L}$/u.test(text[j]))
+                        break
+                    j--
+                }
+            }
+            const precedingWord = text.substring(j + 1, firstPunctPos)
+            /*  skip abbreviations (only relevant for periods)  */
+            if (pm[1] === ".") {
+                /*  skip single-letter abbreviations (handles "U.S.", "e.g.", "i.e.", etc.)  */
+                if (precedingWord.length === 1 && /^\p{L}$/u.test(precedingWord))
+                    continue
+                /*  skip known multi-letter abbreviations (case-insensitive matching)  */
+                if (SpeechFlowNodeT2TSentence.abbreviations.has(precedingWord.toLowerCase()))
+                    continue
+            }
+            /*  return what follows the punctuation mark
+                (also skip over optional closing quotes/parentheses/brackets)  */
+            const after = text.substring(i + 1)
+            const m = after.match(/^(["\u201D\u2019)\]]*)\s+([\s\S]+)$/)
+            if (m !== null)
+                return { sentence: text.substring(0, i + 1 + m[1].length), rest: m[2] }
+            /*  found a punctuation at end of text (possibly with trailing closing chars and whitespace)  */
+            if (/^["\u201D\u2019)\]]*\s*$/.test(after))
+                return { sentence: text.substring(0, i + 1) + after.replace(/\s+$/, ""), rest: "" }
+        }
+        return null
+    }
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -53,7 +113,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
     /*  concatenate two payloads with proper whitespacing  */
     private concatPayload (s1: string, s2: string) {
-        if (!(s1.match(/\s+$/) || s2.match(/^\s+/)))
+        if (!(/\s+$/.test(s1) || /^\s+/.test(s2)))
             return `${s1} ${s2}`
         else
             return `${s1}${s2}`
@@ -64,24 +124,12 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
         /*  clear destruction flag  */
         this.closing = false
-        /*  work off queued text frames  */
-        let workingOff = false
-        const workOffQueue = async () => {
-            if (this.closing)
-                return
-            /*  control working off round  */
-            if (workingOff)
-                return
-            workingOff = true
-            if (this.workingOffTimer !== null) {
-                clearTimeout(this.workingOffTimer)
-                this.workingOffTimer = null
-            }
-            this.queue.off("write", workOffQueue)
-            /*  try to work off one or more chunks  */
-            while (!this.closing) {
+        /*  work off queued text frames (inner processing)  */
+        const workOffQueueInner = (): boolean => {
+            const maxIterations = 50
+            let iterations = 0
+            while (!this.closing && iterations < maxIterations) {
+                iterations++
                 const element = this.queueSplit.peek()
                 if (element === undefined)
                     break
@@ -91,46 +139,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                 }
                 /*  skip elements already completed  */
-                if (element.type === "text-frame" && element.chunk.kind === "final" && element.complete === true) {
+                if (element.type === "text-frame"
+                    && element.chunk.kind === "final"
+                    && element.complete === true) {
                     this.queueSplit.walk(+1)
                     continue
                 }
                 /*  perform sentence splitting on input chunk  */
                 if (element.chunk.kind === "final") {
+                    element.chunk = element.chunk.clone()
                     const chunk = element.chunk
                     const payload = chunk.payload as string
-                    const m = payload.match(/^((?:.|\r?\n)+?[.;?!])(?:\s+((?:.|\r?\n)+)|\s*)$/)
-                    if (m !== null) {
+                    const boundary = SpeechFlowNodeT2TSentence.findSentenceBoundary(payload)
+                    if (boundary !== null) {
                         /*  contains a sentence  */
-                        const [ , sentence, rest ] = m
-                        if (rest !== undefined && rest !== "") {
+                        const { sentence, rest } = boundary
+                        if (rest !== "") {
                             /*  contains more than a sentence  */
                             const chunk2 = chunk.clone()
                             const duration = Duration.fromMillis(
                                 chunk.timestampEnd.minus(chunk.timestampStart).toMillis() *
-                                (sentence.length / payload.length))
+                                (sentence.length / Math.max(payload.length, 1)))
                             chunk2.timestampStart = chunk.timestampStart.plus(duration)
                             chunk.timestampEnd    = chunk2.timestampStart
                             chunk.payload  = sentence
                             chunk2.payload = rest
                             element.complete = true
-                            this.queue.silent(true)
-                            this.queueSplit.touch()
-                            this.queue.silent(false)
+                            this.queue.silently(() => { this.queueSplit.touch() })
                             this.queueSplit.walk(+1)
                             this.queueSplit.insert({ type: "text-frame", chunk: chunk2, complete: false })
                         }
                         else {
                             /*  contains just the sentence  */
                             element.complete = true
-                            this.queue.silent(true)
-                            this.queueSplit.silent(true)
-                            const position = this.queueSplit.position()
-                            this.queueSplit.walk(+1)
-                            this.queue.silent(false)
-                            this.queueSplit.silent(false)
-                            this.queueSplit.touch(position)
+                            const position = this.queue.silently(() =>
+                                this.queueSplit.silently(() => {
+                                    const pos = this.queueSplit.position()
+                                    this.queueSplit.walk(+1)
+                                    return pos
+                                })
+                            )
+                            if (position < this.queue.elements.length)
+                                this.queueSplit.touch(position)
                         }
                     }
                     else {
@@ -151,21 +202,52 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                             }
                             if (element2.chunk.kind === "final") {
                                 /*  merge into following chunk  */
+                                element2.chunk = element2.chunk.clone()
                                 element2.chunk.timestampStart = element.chunk.timestampStart
                                 element2.chunk.payload = this.concatPayload(element.chunk.payload as string,
                                     element2.chunk.payload as string)
                                 /*  remove current element and touch now current element  */
-                                this.queue.silent(true)
-                                this.queueSplit.delete()
-                                this.queue.silent(false)
+                                this.queue.silently(() => { this.queueSplit.delete() })
                                 this.queueSplit.touch()
                             }
-                            else
-                                break
+                            else {
+                                /*  following chunk is intermediate (speculative):
+                                    check timeout to flush incomplete sentence fragment  */
+                                if (this.lastChunkTime > 0
+                                    && (Date.now() - this.lastChunkTime) >= (this.params.timeout as number)) {
+                                    element.complete = true
+                                    const position2 = this.queue.silently(() =>
+                                        this.queueSplit.silently(() => {
+                                            const pos = this.queueSplit.position()
+                                            this.queueSplit.walk(+1)
+                                            return pos
+                                        })
+                                    )
+                                    if (position2 < this.queue.elements.length)
+                                        this.queueSplit.touch(position2)
+                                }
+                                else
+                                    break
+                            }
+                        }
+                        else if (this.lastChunkTime > 0
+                            && (Date.now() - this.lastChunkTime) >= (this.params.timeout as number)) {
+                            /*  no following chunk yet, but timeout expired:
+                                flush incomplete sentence fragment  */
+                            element.complete = true
+                            const position = this.queue.silently(() =>
+                                this.queueSplit.silently(() => {
+                                    const pos = this.queueSplit.position()
+                                    this.queueSplit.walk(+1)
+                                    return pos
+                                })
+                            )
+                            if (position < this.queue.elements.length)
+                                this.queueSplit.touch(position)
                         }
                         else {
-                            /*  no following chunk yet  */
+                            /*  no following chunk yet, still within timeout  */
                             break
                         }
                     }
@@ -173,18 +255,48 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                 else
                     break
             }
+            return (!this.closing && iterations >= maxIterations)
+        }
+        /*  work off queued text frames (outer processing)  */
+        let workingOff = false
+        const workOffQueue = async () => {
+            if (this.closing)
+                return
+            /*  control working off round  */
+            if (workingOff)
+                return
+            workingOff = true
+            if (this.workingOffTimer !== null) {
+                clearTimeout(this.workingOffTimer)
+                this.workingOffTimer = null
+            }
+            this.queue.off("write", workOffQueue)
-            /*  re-initiate working off round (if still not destroyed)  */
-            if (!this.closing) {
-                this.workingOffTimer = setTimeout(workOffQueue, 100)
-                this.queue.once("write", workOffQueue)
+            /*  try to work off one or more chunks  */
+            let hasMore = false
+            try {
+                hasMore = workOffQueueInner()
+            }
+            catch (error) {
+                this.log("error", `sentence splitting error: ${error}`)
+            }
+            finally {
+                /*  re-initiate working off round (if still not destroyed)  */
+                workingOff = false
+                if (!this.closing) {
+                    this.workingOffTimer = setTimeout(workOffQueue, hasMore ? 0 : 100)
+                    this.queue.once("write", workOffQueue)
+                }
             }
-            workingOff = false
         }
         this.queue.once("write", workOffQueue)
         /*  provide Duplex stream and internally attach to classifier  */
-        let previewed = false
+        let previewedPayload = ""
+        let flushListenerRegistered = false
+        let eofPushed = false
         const self = this
         this.stream = new Stream.Duplex({
             writableObjectMode: true,
@@ -217,8 +329,9 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                             }
                         }
                     }
-                    previewed = false
+                    previewedPayload = ""
                     self.queueRecv.append({ type: "text-frame", chunk, complete: false })
+                    self.lastChunkTime = Date.now()
                     callback()
                 }
             },
@@ -229,6 +342,20 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     callback()
                     return
                 }
+                /*  promote any trailing intermediate chunk to final
+                    (no replacement will ever arrive, so treat it as final)  */
+                const recvPos = self.queueRecv.position()
+                if (recvPos > 0) {
+                    const element = self.queueRecv.peek(recvPos - 1)
+                    if (element
+                        && element.type === "text-frame"
+                        && element.chunk.kind === "intermediate") {
+                        element.chunk = element.chunk.clone()
+                        element.chunk.kind = "final"
+                    }
+                }
                 /*  signal end of file  */
                 self.queueRecv.append({ type: "text-eof" })
                 callback()
@@ -236,17 +363,27 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
             /*  send text chunk(s) (readable side of stream)  */
             read (_size) {
+                /*  idempotently push EOF to readable side  */
+                const pushNull = () => {
+                    if (eofPushed)
+                        return
+                    eofPushed = true
+                    this.push(null)
+                }
                 /*  flush pending text chunks  */
                 const flushPendingChunks = () => {
+                    flushListenerRegistered = false
                     if (self.closing) {
-                        this.push(null)
+                        pushNull()
                         return
                     }
                     const element = self.queueSend.peek()
                     if (element !== undefined
                         && element.type === "text-eof") {
-                        this.push(null)
+                        pushNull()
                         self.queueSend.walk(+1)
+                        self.queue.trim()
                     }
                     else if (element !== undefined
                         && element.type === "text-frame"
@@ -258,7 +395,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                             if (nextElement === undefined)
                                 break
                             else if (nextElement.type === "text-eof") {
-                                this.push(null)
+                                pushNull()
                                 self.queueSend.walk(+1)
                                 eofSeen = true
                                 break
@@ -266,20 +403,22 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                             else if (nextElement.type === "text-frame"
                                 && nextElement.complete !== true)
                                 break
-                            self.log("info", `send text 1 (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)} pos=${self.queueSend.position()}`)
+                            self.log("info", `send text/complete (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)} pos=${self.queueSend.position()}`)
                             this.push(nextElement.chunk)
                             self.queueSend.walk(+1)
-                            self.queue.trim()
                         }
+                        previewedPayload = ""
+                        self.queue.trim()
                         /*  wait for more data (unless end-of-stream was reached)  */
-                        if (!eofSeen && !self.closing)
+                        if (!eofSeen && !self.closing && !flushListenerRegistered) {
+                            flushListenerRegistered = true
                             self.queue.once("write", flushPendingChunks)
+                        }
                     }
                     else if (element !== undefined
                         && element.type === "text-frame"
                         && element.complete === false
-                        && !previewed
                         && self.params.interim === true) {
                         /*  merge together all still queued elements and
                             send this out as an intermediate chunk as preview  */
@@ -293,17 +432,30 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                                 break
                             previewChunk.payload = self.concatPayload(
                                 previewChunk.payload as string, element2.chunk.payload as string)
+                            previewChunk.timestampEnd = element2.chunk.timestampEnd
+                        }
+                        /*  send preview only if payload actually changed  */
+                        if ((previewChunk.payload as string) !== previewedPayload) {
+                            this.push(previewChunk)
+                            self.log("info", `send text/preview (intermediate): ${JSON.stringify(previewChunk.payload)}`)
+                            previewedPayload = previewChunk.payload as string
                         }
-                        this.push(previewChunk)
-                        self.log("info", `send text 2 (intermediate): ${JSON.stringify(previewChunk.payload)}`)
-                        previewed = true
                         /*  wait for more data  */
-                        if (!self.closing)
+                        if (!self.closing && !flushListenerRegistered) {
+                            flushListenerRegistered = true
                             self.queue.once("write", flushPendingChunks)
+                        }
                     }
-                    else if (!self.closing)
+                    else if (!self.closing && !flushListenerRegistered) {
+                        flushListenerRegistered = true
                         self.queue.once("write", flushPendingChunks)
+                    }
+                }
+                if (flushListenerRegistered) {
+                    self.queue.removeListener("write", flushPendingChunks)
+                    flushListenerRegistered = false
                 }
                 flushPendingChunks()
             }
@@ -321,8 +473,9 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
             this.workingOffTimer = null
         }
-        /*  remove any pending event listeners  */
+        /*  remove any pending event listeners and clear queue  */
         this.queue.removeAllListeners("write")
+        this.queue.clear()
         /*  shutdown stream  */
         if (this.stream !== null) {

package/speechflow-cli/src/speechflow-node-xio-exec.ts CHANGED Viewed

@@ -96,7 +96,7 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
         this.subprocess.on("error", (err) => {
             this.log("error", `subprocess error: ${err.message}`)
             this.emit("error", err)
-            if (this.stream !== null)
+            if (this.stream !== null && !this.stream.destroyed)
                 this.stream.emit("error", err)
         })
@@ -172,6 +172,10 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
                 })
             }
+            /*  remove event listeners to prevent errors during kill sequence  */
+            this.subprocess.removeAllListeners("error")
+            this.subprocess.removeAllListeners("exit")
             /*  wait for subprocess to exit gracefully  */
             const ac2 = new AbortController()
             await Promise.race([
@@ -208,10 +212,6 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
                 this.log("error", "subprocess did not terminate even after SIGKILL")
             })
-            /*  remove event listeners to prevent memory leaks  */
-            this.subprocess.removeAllListeners("error")
-            this.subprocess.removeAllListeners("exit")
             /*  clear subprocess reference  */
             this.subprocess = null
         }

package/speechflow-cli/src/speechflow-node-xio-webrtc.ts CHANGED Viewed

@@ -46,6 +46,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
     private rtpSequence                                           = 0
     private rtpTimestamp                                          = 0
     private rtpSSRC                                               = 0
+    private rtpMarkerNext                                         = true
     private maxConnections                                        = 10
     /*  Opus codec configuration: 48kHz, mono, 16-bit  */
@@ -177,7 +178,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
             padding:        false,
             paddingSize:    0,
             extension:      false,
-            marker:         true,
+            marker:         this.rtpMarkerNext,
             payloadType:    111, /*  Opus payload type  */
             sequenceNumber: this.rtpSequence++ & 0xFFFF,
             timestamp:      this.rtpTimestamp,
@@ -186,6 +187,9 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
             extensions:     []
         })
+        /*  clear marker (set only on first packet of a talkspurt per RFC 3551)  */
+        this.rtpMarkerNext = false
         /*  build RTP packet  */
         const rtpPacket = new RtpPacket(rtpHeader, opusPacket)
@@ -365,6 +369,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
         this.rtpSequence  = Math.floor(Math.random() * 0x10000)
         this.rtpTimestamp = Math.floor(Math.random() * 0x100000000) >>> 0
         this.rtpSSRC      = Math.floor(Math.random() * 0x100000000) >>> 0
+        this.rtpMarkerNext = true
         /*  setup chunk queue for incoming audio  */
         this.chunkQueue = new util.AsyncQueue<SpeechFlowChunk>()
@@ -482,6 +487,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
                 }
                 if (self.peerConnections.size === 0) {
                     /*  silently drop if no viewers connected  */
+                    self.rtpMarkerNext = true
                     callback()
                     return
                 }

package/speechflow-cli/src/speechflow-node-xio-websocket.ts CHANGED Viewed

@@ -102,8 +102,13 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
                         buffer = Buffer.from(data)
                     else
                         buffer = Buffer.concat(data)
-                    const chunk = util.streamChunkDecode(buffer)
-                    this.chunkQueue?.write(chunk)
+                    try {
+                        const chunk = util.streamChunkDecode(buffer)
+                        this.chunkQueue?.write(chunk)
+                    }
+                    catch (_err: unknown) {
+                        this.log("warning", `received invalid CBOR chunk on URL ${this.params.listen} from peer ${peer}`)
+                    }
                 })
             })
             this.server.on("error", (error) => {
@@ -219,8 +224,13 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
                     return
                 }
                 const buffer = Buffer.from(ev.data)
-                const chunk = util.streamChunkDecode(buffer)
-                this.chunkQueue?.write(chunk)
+                try {
+                    const chunk = util.streamChunkDecode(buffer)
+                    this.chunkQueue?.write(chunk)
+                }
+                catch (_err: unknown) {
+                    this.log("warning", `received invalid CBOR chunk from URL ${this.params.connect}`)
+                }
             })
             this.client.binaryType = "arraybuffer"
             const self = this

package/speechflow-cli/src/speechflow-util-audio.ts CHANGED Viewed

@@ -262,16 +262,9 @@ export class WebAudio {
                 for (let i = 0; i < int16Array.length; i++)
                     float32Data[i] = int16Array[i] / 32768.0
-                /*  start capture first  */
-                if (this.captureNode !== null) {
-                    this.captureNode.port.postMessage({
-                        type: "start-capture",
-                        chunkId,
-                        expectedSamples: int16Array.length
-                    })
-                }
-                /*  wait for capture-ready acknowledgment before sending data  */
+                /*  register capture-ready handler first (before posting start-capture,
+                    to avoid a race where capture-ready arrives before the listener
+                    is in place)  */
                 const readyHandler = (event: MessageEvent) => {
                     const { type: msgType, chunkId: msgChunkId } = event.data ?? {}
                     if (msgType === "capture-ready" && msgChunkId === chunkId) {
@@ -287,6 +280,15 @@ export class WebAudio {
                 }
                 if (this.captureNode !== null)
                     this.captureNode.port.addEventListener("message", readyHandler)
+                /*  start capture after handler is registered  */
+                if (this.captureNode !== null) {
+                    this.captureNode.port.postMessage({
+                        type: "start-capture",
+                        chunkId,
+                        expectedSamples: int16Array.length
+                    })
+                }
             }
             catch (error) {
                 clearTimeout(timeout)

package/speechflow-cli/src/speechflow-util-llm.ts CHANGED Viewed

@@ -7,6 +7,9 @@
 /*  standard dependencies  */
 import EventEmitter                  from "node:events"
+/*  internal dependencies  */
+import * as util                     from "./speechflow-util-misc"
 /*  external dependencies  */
 import OpenAI                        from "openai"
 import Anthropic                     from "@anthropic-ai/sdk"
@@ -353,8 +356,16 @@ export class LLM extends EventEmitter {
             this.ollama?.abort()
             this.ollama = null
         }
-        else if (this.config.provider === "transformers") {
-            this.transformer?.dispose()
+        else if (this.config.provider === "transformers" && this.transformer !== null) {
+            const ac = new AbortController()
+            await Promise.race([
+                this.transformer.dispose(),
+                util.timeout(5000, "transformer dispose timeout", ac.signal)
+            ]).finally(() => {
+                ac.abort()
+            }).catch((error) => {
+                this.log("warning", `error during transformer cleanup: ${error}`)
+            })
             this.transformer = null
         }
         this.initialized = false