npm - speechflow - Versions diffs - 2.0.4 → 2.1.0 - Mend

speechflow 2.0.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/CHANGELOG.md +15 -0
package/README.md +34 -5
package/etc/speechflow.yaml +20 -48
package/etc/stx.conf +2 -2
package/package.json +3 -3
package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.d.ts +1 -0
package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +60 -0
package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js.map +1 -0
package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.d.ts +15 -0
package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +234 -0
package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -0
package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.d.ts +16 -0
package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js +275 -0
package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js.map +1 -0
package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +32 -15
package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +26 -6
package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -1
package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -0
package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +72 -5
package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
package/speechflow-cli/etc/oxlint.jsonc +1 -0
package/speechflow-cli/package.d/sherpa-onnx+1.12.23.patch +12 -0
package/speechflow-cli/package.json +20 -17
package/speechflow-cli/src/lib.d.ts +30 -4
package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +68 -0
package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +219 -0
package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +33 -15
package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +30 -11
package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +86 -10
package/speechflow-ui-db/dst/index.css +1 -1
package/speechflow-ui-db/dst/index.js +13 -13
package/speechflow-ui-db/package.json +12 -11
package/speechflow-ui-db/src/app.vue +62 -17
package/speechflow-ui-st/dst/index.css +1 -1
package/speechflow-ui-st/dst/index.js +32 -32
package/speechflow-ui-st/package.json +13 -12
package/speechflow-ui-st/src/app.vue +9 -8

package/speechflow-cli/src/speechflow-node-a2a-meter.ts CHANGED Viewed

@@ -81,11 +81,11 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
             /*  grab the accumulated chunk data  */
             const chunkData = this.chunkBuffer
-            this.chunkBuffer = new Float32Array(0)
+            this.chunkBuffer = chunkData.subarray(samplesPerChunk)
             /*  update internal audio sample sliding window for LUFS-M  */
             if (chunkData.length > sampleWindow.length)
-                sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
+                sampleWindow.set(chunkData.subarray(0, sampleWindow.length), 0)
             else {
                 sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
                 sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)

package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts CHANGED Viewed

@@ -37,7 +37,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
             model:    { type: "string",  val: "nova-2", pos: 0 },
             version:  { type: "string",  val: "latest", pos: 1 },
             language: { type: "string",  val: "multi",  pos: 2 },
-            interim:  { type: "boolean", val: false,    pos: 3 }
+            interim:  { type: "boolean", val: false,    pos: 3 },
+            keywords: { type: "string",  val: "",       pos: 4 }
         })
         /*  sanity check parameters  */
@@ -86,34 +87,51 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
         /*  create a store for the meta information  */
         const metastore = new util.TimeStore<Map<string, any>>()
-        /*  connect to Deepgram API  */
-        const deepgram = Deepgram.createClient(this.params.key)
-        let language = "en"
-        if (this.params.language !== "en") {
-            if (this.params.model.match(/^nova-2/))
-                language = this.params.language
-            else if (this.params.model.match(/^nova-3/))
-                language = "multi"
-        }
-        this.dg = deepgram.listen.live({
+        /*  configure Deepgram connection options  */
+        const options: Deepgram.LiveSchema = {
             mip_opt_out:      true,
             model:            this.params.model,
             version:          this.params.version,
-            language,
             channels:         this.config.audioChannels,
             sample_rate:      this.config.audioSampleRate,
             encoding:         "linear16",
             multichannel:     false,
             endpointing:      false,
             interim_results:  this.params.interim,
-            smart_format:     true,
+            smart_format:     false,
             punctuate:        true,
             filler_words:     true,
-            numerals:         true,
+            numerals:         false,
             diarize:          false,
             profanity_filter: false,
             redact:           false
-        })
+        }
+        const model    = this.params.model    as string
+        const language = this.params.language as string
+        const keywords = this.params.keywords as string
+        if (model.match(/^nova-2/) && language !== "en")
+            options.language = this.params.language
+        else if (model.match(/^nova-3/) && language !== "en")
+            options.language = "multi"
+        else
+            options.language = "en"
+        if (keywords !== "") {
+            if (model.match(/^nova-2/))
+                options.keywords = keywords.split(/(?:\s+|\s*,\s*)/).map((kw) => {
+                    let boost = 2
+                    if (kw.startsWith("-")) {
+                        kw = kw.slice(1)
+                        boost = -4
+                    }
+                    return `${kw}:${boost}`
+                })
+            else if (model.match(/^nova-3/))
+                options.keyterm = keywords.split(/(?:\s+|\s*,\s*)/).join(" ")
+        }
+        /*  connect to Deepgram API  */
+        const deepgram = Deepgram.createClient(this.params.key)
+        this.dg = deepgram.listen.live(options)
         /*  hook onto Deepgram API events  */
         this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {

package/speechflow-cli/src/speechflow-node-t2t-deepl.ts CHANGED Viewed

@@ -63,7 +63,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
         /*  provide text-to-text translation  */
         const translate = async (text: string) => {
-            const src = this.params.src === "en" ? "en-US" : this.params.src
+            const src = this.params.src
             const dst = this.params.dst === "en" ? "en-US" : this.params.dst
             const result = await this.deepl!.translateText(text, src, dst, {
                 splitSentences: "off",

package/speechflow-cli/src/speechflow-node-t2t-profanity.ts CHANGED Viewed

@@ -5,12 +5,13 @@
 */
 /*  standard dependencies  */
-import Stream       from "node:stream"
+import Stream                              from "node:stream"
 /*  external dependencies  */
-import BadWordsNext from "bad-words-next"
-import en           from "bad-words-next/lib/en"
-import de           from "bad-words-next/lib/de"
+import BadWordsNext                        from "bad-words-next"
+import en                                  from "bad-words-next/lib/en"
+import de                                  from "bad-words-next/lib/de"
+import { Profanity, CensorType }           from "@2toad/profanity"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -31,8 +32,7 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
         /*  declare node configuration parameters  */
         this.configure({
             lang:        { type: "string", val: "en", match: /^(?:en|de)$/ },
-            placeholder: { type: "string", val: "***" },
-            mode:        { type: "string", val: "replace", match: /^(?:replace|repeat)$/ }
+            placeholder: { type: "string", val: "***" }
         })
         /*  declare node input/output format  */
@@ -42,18 +42,37 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
     /*  open node  */
     async open () {
-        /*  create profanity filter instance  */
-        const filter = util.run("creating profanity filter", () =>
+        /*  create profanity filter instances  */
+        const filter1 = util.run("creating profanity filter 1", () =>
             new BadWordsNext({
                 data:            langData[this.params.lang],
                 placeholder:     this.params.placeholder,
-                placeholderMode: this.params.mode as "replace" | "repeat"
+                placeholderMode: "repeat" as "replace" | "repeat"
             })
         )
+        const filter2 = util.run("creating profanity filter 2", () => {
+            const profanity = new Profanity({
+                languages: [ this.params.lang ],
+                grawlix:   this.params.placeholder,
+                wholeWord: true
+            })
+            if (this.params.lang === "de") {
+                /*  improve word-list for german language  */
+                profanity.addWords([ "sex" ])
+                profanity.removeWords([
+                    "verdammt", "glocke", "wahnsinn", "knochen", "fehler", "mist", "phantasievoll",
+                    "huhn", "ziegen", "geil", "lustig", "verzögert", "schrauben", "geschlecht"
+                ])
+            }
+            return profanity
+        })
         /*  apply profanity filtering  */
-        const censor = (text: string): string =>
-            filter.filter(text)
+        const censor = (text: string): string => {
+            text = filter1.filter(text)
+            text = filter2.censor(text, CensorType.Word)
+            return text
+        }
         /*  establish a transform stream and connect it to profanity filtering  */
         this.stream = new Stream.Transform({

package/speechflow-cli/src/speechflow-node-t2t-sentence.ts CHANGED Viewed

@@ -14,13 +14,14 @@ import { Duration }       from "luxon"
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
 import * as util                           from "./speechflow-util"
-/*  text stream queue element */
+/*  text stream queue element  */
 type TextQueueElement = {
-    type:         "text-frame",
-    chunk:        SpeechFlowChunk,
-    complete?:    boolean
+    type:      "text-frame",
+    chunk:     SpeechFlowChunk,
+    preview?:  "pending" | "sent",
+    complete?: boolean
 } | {
-    type:         "text-eof"
+    type:      "text-eof"
 }
 /*  SpeechFlow node for sentence splitting  */
@@ -35,13 +36,16 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
     private queueSend  = this.queue.pointerUse("send")
     private closing  = false
     private workingOffTimer: ReturnType<typeof setTimeout> | null = null
+    private previewTimer:    ReturnType<typeof setTimeout> | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
         super(id, cfg, opts, args)
         /*  declare node configuration parameters  */
-        this.configure({})
+        this.configure({
+            timeout: { type: "number", pos: 0, val: 3 * 1000 }
+        })
         /*  declare node input/output format  */
         this.input  = "text"
@@ -78,6 +82,8 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     this.queueSplit.walk(+1)
                     break
                 }
+                /*  perform sentence splitting on input chunk  */
                 const chunk = element.chunk
                 const payload = chunk.payload as string
                 const m = payload.match(/^((?:.|\r?\n)+?[.;?!])\s*((?:.|\r?\n)*)$/)
@@ -115,20 +121,33 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                         if (element2 === undefined)
                             break
                         if (element2.type === "text-eof") {
+                            /*  no more chunks: output as final
+                                (perhaps incomplete sentence at end of stream)  */
                             element.complete = true
                             this.queueSplit.touch()
                             this.queueSplit.walk(+1)
                             break
                         }
+                        /*  merge into following chunk  */
                         element2.chunk.timestampStart = element.chunk.timestampStart
                         element2.chunk.payload =
                             (element.chunk.payload  as string) + " " +
                             (element2.chunk.payload as string)
+                        /*  reset preview state (merged content needs new preview)  */
+                        element2.preview = undefined
                         this.queueSplit.delete()
                         this.queueSplit.touch()
                     }
-                    else
+                    else {
+                        /*  no following chunk yet: mark for intermediate preview output  */
+                        if (element.preview !== "sent") {
+                            element.preview = "pending"
+                            this.queueSplit.touch()
+                        }
                         break
+                    }
                 }
             }
@@ -157,8 +176,23 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     callback(new Error("expected text input as string chunks"))
                 else if (chunk.payload.length === 0)
                     callback()
+                else if (chunk.kind === "intermediate") {
+                    /*  intermediate chunks: pass through immediately (bypass queue)  */
+                    self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
+                    self.log("info", `send text (intermediate pass-through): ${JSON.stringify(chunk.payload)}`)
+                    this.push(chunk)
+                    callback()
+                }
                 else {
-                    self.log("info", `received text: ${JSON.stringify(chunk.payload)}`)
+                    /*  final chunks: queue for sentence splitting  */
+                    self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
+                    /*  cancel any pending preview timeout  */
+                    if (self.previewTimer !== null) {
+                        clearTimeout(self.previewTimer)
+                        self.previewTimer = null
+                    }
                     self.queueRecv.append({ type: "text-frame", chunk })
                     callback()
                 }
@@ -192,6 +226,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     else if (element !== undefined
                         && element.type === "text-frame"
                         && element.complete === true) {
+                        /*  send all consecutive complete chunks  */
                         while (true) {
                             const nextElement = self.queueSend.peek()
                             if (nextElement === undefined)
@@ -204,12 +239,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                             else if (nextElement.type === "text-frame"
                                 && nextElement.complete !== true)
                                 break
-                            self.log("info", `send text: ${JSON.stringify(nextElement.chunk.payload)}`)
+                            self.log("info", `send text (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)}`)
                             this.push(nextElement.chunk)
                             self.queueSend.walk(+1)
                             self.queue.trim()
                         }
                     }
+                    else if (element !== undefined
+                        && element.type === "text-frame"
+                        && element.preview === "pending") {
+                        /*  send intermediate preview (without advancing pointer)  */
+                        const previewChunk = element.chunk.clone()
+                        previewChunk.kind = "intermediate"
+                        self.log("info", `send text (intermediate preview): ${JSON.stringify(previewChunk.payload)}`)
+                        this.push(previewChunk)
+                        element.preview = "sent"
+                        self.queueSend.touch()
+                        /*  start preview timeout (if configured)  */
+                        const timeout = self.params.timeout as number
+                        if (timeout > 0 && self.previewTimer === null) {
+                            self.previewTimer = setTimeout(() => {
+                                self.previewTimer = null
+                                if (self.closing)
+                                    return
+                                /*  promote preview to final chunk  */
+                                const el = self.queueSend.peek()
+                                if (el !== undefined
+                                    && el.type === "text-frame"
+                                    && el.preview === "sent"
+                                    && el.complete !== true) {
+                                    self.log("info", `timeout: promoting intermediate to final: ${JSON.stringify(el.chunk.payload)}`)
+                                    el.complete = true
+                                    self.queueSend.touch()
+                                    self.queue.emit("write")
+                                }
+                            }, timeout)
+                        }
+                        /*  wait for more data  */
+                        if (!self.closing)
+                            self.queue.once("write", flushPendingChunks)
+                    }
                     else if (!self.closing)
                         self.queue.once("write", flushPendingChunks)
                 }
@@ -223,11 +295,15 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
         /*  indicate closing  */
         this.closing = true
-        /*  clean up timer  */
+        /*  clean up timers  */
         if (this.workingOffTimer !== null) {
             clearTimeout(this.workingOffTimer)
             this.workingOffTimer = null
         }
+        if (this.previewTimer !== null) {
+            clearTimeout(this.previewTimer)
+            this.previewTimer = null
+        }
         /*  remove any pending event listeners  */
         this.queue.removeAllListeners("write")