npm - speechflow - Versions diffs - 1.6.7 → 1.7.0 - Mend

speechflow 1.6.7 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts CHANGED Viewed

@@ -20,10 +20,16 @@ import HAPIWebSocket from "hapi-plugin-websocket"
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
 import * as util                           from "./speechflow-util"
+/*  internal helper types  */
 type WSPeerInfo = {
-    ctx:  Record<string, any>
-    ws:   WebSocket
-    req:  http.IncomingMessage
+    ctx:   Record<string, any>
+    ws:    WebSocket
+    req:   http.IncomingMessage
+}
+type TextChunk = {
+    start: Duration
+    end:   Duration
+    text:  string
 }
 /*  SpeechFlow node for subtitle (text-to-text) "translations"  */
@@ -43,14 +49,14 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
         this.configure({
             format: { type: "string",  pos: 0, val: "srt",    match: /^(?:srt|vtt)$/ },
             words:  { type: "boolean",         val: false },
-            mode:   { type: "string",          val: "export", match: /^(?:export|render)$/ },
+            mode:   { type: "string",          val: "export", match: /^(?:export|import|render)$/ },
             addr:   { type: "string",          val: "127.0.0.1" },
             port:   { type: "number",          val: 8585 }
         })
         /*  declare node input/output format  */
         this.input  = "text"
-        this.output = this.params.mode === "export" ? "text" : "none"
+        this.output = (this.params.mode === "export" || this.params.mode === "import") ? "text" : "none"
     }
     /*  open node  */
@@ -95,11 +101,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
                     }
                     return text
                 }
-                let output = ""
+                /*  determine start and end timestamp,
+                    by using first word's start time and last word's end time (if available),
+                    to exclude leading and trailing silence parts  */
+                const words: { word: string, start: Duration, end: Duration }[] = chunk.meta.get("words") ?? []
+                const timestampStart = words.length > 0 ? words[0].start              : chunk.timestampStart
+                const timestampEnd   = words.length > 0 ? words[words.length - 1].end : chunk.timestampEnd
+                /*  produce SRT/VTT blocks  */
+                let output = convertSingle(timestampStart, timestampEnd, chunk.payload)
                 if (this.params.words) {
-                    output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
-                    const words = (chunk.meta.get("words") ?? []) as
-                        { word: string, start: Duration, end: Duration }[]
+                    /*  produce additional SRT/VTT blocks with each word highlighted  */
                     const occurrences = new Map<string, number>()
                     for (const word of words) {
                         let occurrence = occurrences.get(word.word) ?? 0
@@ -108,49 +121,210 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
                         output += convertSingle(word.start, word.end, chunk.payload, word.word, occurrence)
                     }
                 }
-                else
-                    output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
                 return output
             }
             /*  establish a duplex stream  */
             const self = this
-            let firstChunk = true
+            let headerEmitted = false
             this.stream = new Stream.Transform({
                 readableObjectMode: true,
                 writableObjectMode: true,
                 decodeStrings:      false,
                 highWaterMark:      1,
                 transform (chunk: SpeechFlowChunk, encoding, callback) {
-                    if (firstChunk && self.params.format === "vtt") {
+                    if (!headerEmitted && self.params.format === "vtt") {
                         this.push(new SpeechFlowChunk(
                             Duration.fromMillis(0), Duration.fromMillis(0),
                             "final", "text",
                             "WEBVTT\n\n"
                         ))
-                        firstChunk = false
+                        headerEmitted = true
                     }
                     if (Buffer.isBuffer(chunk.payload))
                         callback(new Error("invalid chunk payload type"))
+                    else if (chunk.payload === "") {
+                        this.push(chunk)
+                        callback()
+                    }
                     else {
-                        if (chunk.payload === "") {
-                            this.push(chunk)
+                        convert(chunk).then((payload) => {
+                            const chunkNew = chunk.clone()
+                            chunkNew.payload = payload
+                            this.push(chunkNew)
                             callback()
+                        }).catch((error: unknown) => {
+                            callback(util.ensureError(error))
+                        })
+                    }
+                },
+                final (callback) {
+                    callback()
+                }
+            })
+        }
+        else if (this.params.mode === "import") {
+            /*  parse timestamp in SRT format ("HH:MM:SS,mmm") or VTT format ("HH:MM:SS.mmm")  */
+            const parseTimestamp = (ts: string): Duration => {
+                const match = ts.match(/^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/)
+                if (!match)
+                    throw new Error(`invalid timestamp format: "${ts}"`)
+                const hours        = Number.parseInt(match[1], 10)
+                const minutes      = Number.parseInt(match[2], 10)
+                const seconds      = Number.parseInt(match[3], 10)
+                const milliseconds = Number.parseInt(match[4], 10)
+                if (minutes > 59 || seconds > 59)
+                    throw new Error(`invalid timestamp value "${ts}"`)
+                return Duration.fromObject({ hours, minutes, seconds, milliseconds })
+            }
+            /*  strip arbitrary HTML tags  */
+            const stripHtmlTags = (text: string): string =>
+                text.replace(/<\/?[a-zA-Z][^>]*>/g, "")
+            /*  parse SRT format  */
+            const parseSRT = (input: string): TextChunk[] => {
+                const results: TextChunk[] = []
+                /*  iterate over all blocks  */
+                const blocks = input.trim().split(/\r?\n\r?\n+/)
+                for (const block of blocks) {
+                    const lines = block.trim().split(/\r?\n/)
+                    if (lines.length < 2) {
+                        this.log("warning", "SRT block contains less than 2 lines")
+                        continue
+                    }
+                    /*  skip optional sequence number line (first line)  */
+                    let lineIdx = 0
+                    if (/^\d+$/.test(lines[0].trim()))
+                        lineIdx = 1
+                    /*  parse timestamp line  */
+                    const timeLine  = lines[lineIdx]
+                    const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})/)
+                    if (!timeMatch) {
+                        this.log("warning", "SRT contains invalid timestamp line")
+                        continue
+                    }
+                    const start = parseTimestamp(timeMatch[1])
+                    const end   = parseTimestamp(timeMatch[2])
+                    /*  collect text lines  */
+                    const textLines = lines.slice(lineIdx + 1).join("\n")
+                    const text = stripHtmlTags(textLines).trim()
+                    if (text !== "")
+                        results.push({ start, end, text })
+                }
+                return results
+            }
+            /*  parse VTT format  */
+            const parseVTT = (input: string): TextChunk[] => {
+                const results: TextChunk[] = []
+                /*  remove VTT header and any metadata  */
+                const content = input.trim().replace(/^WEBVTT[^\r\n]*\r?\n*/, "")
+                /*  iterate over all blocks  */
+                const blocks = content.trim().split(/\r?\n\r?\n+/)
+                for (const block of blocks) {
+                    const lines = block.trim().split(/\r?\n/)
+                    if (lines.length < 1) {
+                        this.log("warning", "VTT block contains less than 1 line")
+                        continue
+                    }
+                    /*  skip optional cue identifier lines  */
+                    let lineIdx = 0
+                    while (lineIdx < lines.length && !lines[lineIdx].includes("-->"))
+                        lineIdx++
+                    if (lineIdx >= lines.length)
+                        continue
+                    /*  parse timestamp line  */
+                    const timeLine  = lines[lineIdx]
+                    const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})/)
+                    if (!timeMatch) {
+                        this.log("warning", "VTT contains invalid timestamp line")
+                        continue
+                    }
+                    const start = parseTimestamp(timeMatch[1])
+                    const end   = parseTimestamp(timeMatch[2])
+                    /*  collect text lines  */
+                    const textLines = lines.slice(lineIdx + 1).join("\n")
+                    const text = stripHtmlTags(textLines).trim()
+                    if (text !== "")
+                        results.push({ start, end, text })
+                }
+                return results
+            }
+            /*  buffer for accumulating input  */
+            let buffer = ""
+            /*  establish a duplex stream  */
+            const self = this
+            this.stream = new Stream.Transform({
+                readableObjectMode: true,
+                writableObjectMode: true,
+                decodeStrings:      false,
+                highWaterMark:      1,
+                transform (chunk: SpeechFlowChunk, encoding, callback) {
+                    /*  sanity check text chunks  */
+                    if (Buffer.isBuffer(chunk.payload)) {
+                        callback(new Error("invalid chunk payload type"))
+                        return
+                    }
+                    /*  short-circuit processing in case of empty payloads  */
+                    if (chunk.payload === "") {
+                        this.push(chunk)
+                        callback()
+                        return
+                    }
+                    /*  accumulate input  */
+                    buffer += chunk.payload
+                    /*  parse accumulated input  */
+                    try {
+                        /*  parse entries  */
+                        const entries = (self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer))
+                        /*  emit parsed entries as individual chunks  */
+                        for (const entry of entries) {
+                            const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
+                            this.push(chunkNew)
                         }
-                        else {
-                            convert(chunk).then((payload) => {
-                                const chunkNew = chunk.clone()
-                                chunkNew.payload = payload
-                                this.push(chunkNew)
-                                callback()
-                            }).catch((error: unknown) => {
-                                callback(util.ensureError(error))
-                            })
-                        }
+                        /*  clear buffer after successful parse  */
+                        buffer = ""
+                        callback()
+                    }
+                    catch (error: unknown) {
+                        buffer = ""
+                        callback(util.ensureError(error))
                     }
                 },
                 final (callback) {
-                    this.push(null)
+                    /*  process any remaining buffer content  */
+                    if (buffer.trim() !== "") {
+                        try {
+                            /*  parse entries  */
+                            const entries = self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer)
+                            /*  emit parsed entries as individual chunks  */
+                            for (const entry of entries) {
+                                const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
+                                this.push(chunkNew)
+                            }
+                        }
+                        catch (_error: unknown) {
+                            /*  ignore parse errors on final flush  */
+                        }
+                    }
                     callback()
                 }
             })
@@ -239,13 +413,11 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
                 write (chunk: SpeechFlowChunk, encoding, callback) {
                     if (Buffer.isBuffer(chunk.payload))
                         callback(new Error("invalid chunk payload type"))
+                    else if (chunk.payload === "")
+                        callback()
                     else {
-                        if (chunk.payload === "")
-                            callback()
-                        else {
-                            emit(chunk)
-                            callback()
-                        }
+                        emit(chunk)
+                        callback()
                     }
                 },
                 final (callback) {

package/speechflow-cli/src/speechflow-node-x2x-filter.ts CHANGED Viewed

@@ -117,7 +117,6 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
                 callback()
             },
             final (callback) {
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-x2x-trace.ts CHANGED Viewed

@@ -119,11 +119,6 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
                 }
             },
             final (callback) {
-                if (self.closing || self.params.mode === "sink") {
-                    callback()
-                    return
-                }
-                this.push(null)
                 callback()
             }
         })

package/speechflow-cli/src/speechflow-node-xio-device.ts CHANGED Viewed

@@ -118,7 +118,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
         this.stream = this.io as unknown as Stream.Duplex
         /*  convert regular stream into object-mode stream  */
-        const wrapper1 = util.createTransformStreamForWritableSide()
+        const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
         const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
         this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
     }
@@ -161,7 +161,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
         this.stream = this.io as unknown as Stream.Writable
         /*  convert regular stream into object-mode stream  */
-        const wrapper = util.createTransformStreamForWritableSide()
+        const wrapper = util.createTransformStreamForWritableSide("audio", 1)
         this.stream = Stream.compose(wrapper, this.stream)
     }

package/speechflow-cli/src/speechflow-node-xio-file.ts CHANGED Viewed

@@ -128,7 +128,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
             }
             /*  convert regular stream into object-mode stream  */
-            const wrapper1 = util.createTransformStreamForWritableSide()
+            const wrapper1 = util.createTransformStreamForWritableSide(this.params.type, 1)
             const wrapper2 = util.createTransformStreamForReadableSide(
                 this.params.type, () => this.timeZero)
             this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
@@ -171,7 +171,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
                 else
                     process.stdout.setEncoding(this.config.textEncoding)
                 const chunker = createStdoutChunker()
-                const wrapper = util.createTransformStreamForWritableSide()
+                const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
                 this.stream = Stream.compose(wrapper, chunker)
             }
             else {
@@ -183,7 +183,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
                 else
                     writable = fs.createWriteStream(this.params.path,
                         { highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
-                const wrapper = util.createTransformStreamForWritableSide()
+                const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
                 this.stream = Stream.compose(wrapper, writable)
             }
         }

package/speechflow-cli/src/speechflow-node-xio-mqtt.ts CHANGED Viewed

@@ -112,6 +112,7 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
             }
         })
         const self = this
+        const reads = new util.PromiseSet<void>()
         this.stream = new Stream.Duplex({
             writableObjectMode: true,
             readableObjectMode: true,
@@ -134,14 +135,18 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
                     })
                 }
             },
+            async final (callback) {
+                await reads.awaitAll()
+                callback()
+            },
             read (size: number) {
                 if (self.params.mode === "w")
                     throw new Error("read operation on write-only node")
-                self.chunkQueue!.read().then((chunk) => {
+                reads.add(self.chunkQueue!.read().then((chunk) => {
                     this.push(chunk, "binary")
                 }).catch((err: Error) => {
                     self.log("warning", `read on chunk queue operation failed: ${err}`)
-                })
+                }))
             }
         })
     }

package/speechflow-cli/src/speechflow-node-xio-websocket.ts CHANGED Viewed

@@ -109,6 +109,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
                 this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`)
             })
             const self = this
+            const reads = new util.PromiseSet<void>()
             this.stream = new Stream.Duplex({
                 writableObjectMode: true,
                 readableObjectMode: true,
@@ -141,14 +142,18 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
                         })
                     }
                 },
+                async final (callback) {
+                    await reads.awaitAll()
+                    callback()
+                },
                 read (size: number) {
                     if (self.params.mode === "w")
                         throw new Error("read operation on write-only node")
-                    chunkQueue.read().then((chunk) => {
+                    reads.add(chunkQueue.read().then((chunk) => {
                         this.push(chunk, "binary")
                     }).catch((err: Error) => {
                         self.log("warning", `read on chunk queue operation failed: ${err}`)
-                    })
+                    }))
                 }
             })
         }
@@ -190,6 +195,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
             })
             this.client.binaryType = "arraybuffer"
             const self = this
+            const reads = new util.PromiseSet<void>()
             this.stream = new Stream.Duplex({
                 writableObjectMode: true,
                 readableObjectMode: true,
@@ -208,14 +214,18 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
                         callback()
                     }
                 },
+                async final (callback) {
+                    await reads.awaitAll()
+                    callback()
+                },
                 read (size: number) {
                     if (self.params.mode === "w")
                         throw new Error("read operation on write-only node")
-                    chunkQueue.read().then((chunk) => {
+                    reads.add(chunkQueue.read().then((chunk) => {
                         this.push(chunk, "binary")
                     }).catch((err: Error) => {
                         self.log("warning", `read on chunk queue operation failed: ${err}`)
-                    })
+                    }))
                 }
             })
         }

package/speechflow-cli/src/speechflow-util-audio.ts CHANGED Viewed

@@ -91,7 +91,7 @@ export function convertBufToI16 (buf: Buffer, littleEndian = true) {
     return arr
 }
-/*  helper function: convert In16Array in PCM/I16 to Buffer  */
+/*  helper function: convert Int16Array in PCM/I16 to Buffer  */
 export function convertI16ToBuf (arr: Int16Array, littleEndian = true) {
     if (arr.length === 0)
         return Buffer.alloc(0)
@@ -252,7 +252,7 @@ export class WebAudio {
                 /*  start capture first  */
                 if (this.captureNode !== null) {
-                    this.captureNode?.port.postMessage({
+                    this.captureNode.port.postMessage({
                         type: "start-capture",
                         chunkId,
                         expectedSamples: int16Array.length

package/speechflow-cli/src/speechflow-util-misc.ts ADDED Viewed

@@ -0,0 +1,23 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  sleep: wait a duration of time and then resolve  */
+export function sleep (durationMs: number) {
+    return new Promise<void>((resolve, reject) => {
+        setTimeout(() => {
+            resolve()
+        }, durationMs)
+    })
+}
+/*  timeout: wait a duration of time and then reject  */
+export function timeout (durationMs: number) {
+    return new Promise<never>((resolve, reject) => {
+        setTimeout(() => {
+            reject(new Error("timeout"))
+        }, durationMs)
+    })
+}

package/speechflow-cli/src/speechflow-util-queue.ts CHANGED Viewed

@@ -35,11 +35,9 @@ export class SingleQueue<T> extends EventEmitter {
     }
     read () {
         return new Promise<T>((resolve, reject) => {
-            const consume = () =>
-                this.queue.length > 0 ? this.queue.pop()! : null
             const tryToConsume = () => {
-                const item = consume()
-                if (item !== null)
+                const item = this.queue.pop()
+                if (item !== undefined)
                     resolve(item)
                 else
                     this.once("dequeue", tryToConsume)
@@ -47,6 +45,11 @@ export class SingleQueue<T> extends EventEmitter {
             tryToConsume()
         })
     }
+    drain () {
+        const items = this.queue
+        this.queue = new Array<T>()
+        return items
+    }
 }
 /*  helper class for double-item queue  */
@@ -67,17 +70,17 @@ export class DoubleQueue<T0, T1> extends EventEmitter {
     }
     read () {
         return new Promise<[ T0, T1 ]>((resolve, reject) => {
-            const consume = (): [ T0, T1 ] | null => {
+            const consume = (): [ T0, T1 ] | undefined => {
                 if (this.queue0.length > 0 && this.queue1.length > 0) {
                     const item0 = this.queue0.pop() as T0
                     const item1 = this.queue1.pop() as T1
                     return [ item0, item1 ]
                 }
-                return null
+                return undefined
             }
             const tryToConsume = () => {
                 const items = consume()
-                if (items !== null)
+                if (items !== undefined)
                     resolve(items)
                 else
                     this.once("dequeue", tryToConsume)
@@ -273,12 +276,12 @@ export class TimeStore<T> extends EventEmitter {
 /*  asynchronous queue  */
 export class AsyncQueue<T> {
-    private queue: Array<T | null> = []
-    private resolvers: ((v: T | null) => void)[] = []
-    write (v: T | null) {
-        const resolve = this.resolvers.shift()
-        if (resolve)
-            resolve(v)
+    private queue: Array<T> = []
+    private resolvers: { resolve: (v: T) => void, reject: (err: Error) => void }[] = []
+    write (v: T) {
+        const resolver = this.resolvers.shift()
+        if (resolver)
+            resolver.resolve(v)
         else
             this.queue.push(v)
     }
@@ -286,11 +289,14 @@ export class AsyncQueue<T> {
         if (this.queue.length > 0)
             return this.queue.shift()!
         else
-            return new Promise<T | null>((resolve) => this.resolvers.push(resolve))
+            return new Promise<T>((resolve, reject) => this.resolvers.push({ resolve, reject }))
+    }
+    empty () {
+        return this.queue.length === 0
     }
     destroy () {
-        for (const resolve of this.resolvers)
-            resolve(null)
+        for (const resolver of this.resolvers)
+            resolver.reject(new Error("AsyncQueue destroyed"))
         this.resolvers = []
         this.queue = []
     }
@@ -318,3 +324,17 @@ export class CachedRegExp {
         return this.cache.size
     }
 }
+/*  set of promises  */
+export class PromiseSet<T> {
+    private promises = new Set<Promise<T>>()
+    add (promise: Promise<T>) {
+        this.promises.add(promise)
+        promise.finally(() => {
+            this.promises.delete(promise)
+        }).catch(() => {})
+    }
+    async awaitAll () {
+        await Promise.all(this.promises)
+    }
+}