npm - speechflow - Versions diffs - 2.0.3 → 2.1.0 - Mend

speechflow 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/speechflow-cli/src/speechflow-node-t2t-opus.ts CHANGED Viewed

@@ -121,17 +121,17 @@ export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  shutdown Transformers  */
-        if (this.translator !== null) {
-            this.translator.dispose()
-            this.translator = null
-        }
         /*  shutdown stream  */
         if (this.stream !== null) {
             await util.destroyStream(this.stream)
             this.stream = null
         }
+        /*  shutdown Transformers  */
+        if (this.translator !== null) {
+            this.translator.dispose()
+            this.translator = null
+        }
     }
 }

package/speechflow-cli/src/speechflow-node-t2t-profanity.ts CHANGED Viewed

@@ -5,12 +5,13 @@
 */
 /*  standard dependencies  */
-import Stream       from "node:stream"
+import Stream                              from "node:stream"
 /*  external dependencies  */
-import BadWordsNext from "bad-words-next"
-import en           from "bad-words-next/lib/en"
-import de           from "bad-words-next/lib/de"
+import BadWordsNext                        from "bad-words-next"
+import en                                  from "bad-words-next/lib/en"
+import de                                  from "bad-words-next/lib/de"
+import { Profanity, CensorType }           from "@2toad/profanity"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -31,8 +32,7 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
         /*  declare node configuration parameters  */
         this.configure({
             lang:        { type: "string", val: "en", match: /^(?:en|de)$/ },
-            placeholder: { type: "string", val: "***" },
-            mode:        { type: "string", val: "replace", match: /^(?:replace|repeat)$/ }
+            placeholder: { type: "string", val: "***" }
         })
         /*  declare node input/output format  */
@@ -42,18 +42,37 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
     /*  open node  */
     async open () {
-        /*  create profanity filter instance  */
-        const filter = util.run("creating profanity filter", () =>
+        /*  create profanity filter instances  */
+        const filter1 = util.run("creating profanity filter 1", () =>
             new BadWordsNext({
                 data:            langData[this.params.lang],
                 placeholder:     this.params.placeholder,
-                placeholderMode: this.params.mode as "replace" | "repeat"
+                placeholderMode: "repeat" as "replace" | "repeat"
             })
         )
+        const filter2 = util.run("creating profanity filter 2", () => {
+            const profanity = new Profanity({
+                languages: [ this.params.lang ],
+                grawlix:   this.params.placeholder,
+                wholeWord: true
+            })
+            if (this.params.lang === "de") {
+                /*  improve word-list for german language  */
+                profanity.addWords([ "sex" ])
+                profanity.removeWords([
+                    "verdammt", "glocke", "wahnsinn", "knochen", "fehler", "mist", "phantasievoll",
+                    "huhn", "ziegen", "geil", "lustig", "verzögert", "schrauben", "geschlecht"
+                ])
+            }
+            return profanity
+        })
         /*  apply profanity filtering  */
-        const censor = (text: string): string =>
-            filter.filter(text)
+        const censor = (text: string): string => {
+            text = filter1.filter(text)
+            text = filter2.censor(text, CensorType.Word)
+            return text
+        }
         /*  establish a transform stream and connect it to profanity filtering  */
         this.stream = new Stream.Transform({

package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts CHANGED Viewed

@@ -141,7 +141,7 @@ export default class SpeechFlowNodeT2TPunctuation extends SpeechFlowNode {
         await this.llm.open()
         /*  provide text-to-text punctuation restoration  */
-        const llm = this.llm!
+        const llm = this.llm
         const punctuate = async (text: string) => {
             const cfg = this.setup[this.params.lang]
             if (!cfg)

package/speechflow-cli/src/speechflow-node-t2t-sentence.ts CHANGED Viewed

@@ -14,13 +14,14 @@ import { Duration }       from "luxon"
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
 import * as util                           from "./speechflow-util"
-/*  text stream queue element */
+/*  text stream queue element  */
 type TextQueueElement = {
-    type:         "text-frame",
-    chunk:        SpeechFlowChunk,
-    complete?:    boolean
+    type:      "text-frame",
+    chunk:     SpeechFlowChunk,
+    preview?:  "pending" | "sent",
+    complete?: boolean
 } | {
-    type:         "text-eof"
+    type:      "text-eof"
 }
 /*  SpeechFlow node for sentence splitting  */
@@ -35,13 +36,16 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
     private queueSend  = this.queue.pointerUse("send")
     private closing  = false
     private workingOffTimer: ReturnType<typeof setTimeout> | null = null
+    private previewTimer:    ReturnType<typeof setTimeout> | null = null
     /*  construct node  */
     constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
         super(id, cfg, opts, args)
         /*  declare node configuration parameters  */
-        this.configure({})
+        this.configure({
+            timeout: { type: "number", pos: 0, val: 3 * 1000 }
+        })
         /*  declare node input/output format  */
         this.input  = "text"
@@ -78,6 +82,8 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     this.queueSplit.walk(+1)
                     break
                 }
+                /*  perform sentence splitting on input chunk  */
                 const chunk = element.chunk
                 const payload = chunk.payload as string
                 const m = payload.match(/^((?:.|\r?\n)+?[.;?!])\s*((?:.|\r?\n)*)$/)
@@ -115,20 +121,33 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                         if (element2 === undefined)
                             break
                         if (element2.type === "text-eof") {
+                            /*  no more chunks: output as final
+                                (perhaps incomplete sentence at end of stream)  */
                             element.complete = true
                             this.queueSplit.touch()
                             this.queueSplit.walk(+1)
                             break
                         }
+                        /*  merge into following chunk  */
                         element2.chunk.timestampStart = element.chunk.timestampStart
                         element2.chunk.payload =
                             (element.chunk.payload  as string) + " " +
                             (element2.chunk.payload as string)
+                        /*  reset preview state (merged content needs new preview)  */
+                        element2.preview = undefined
                         this.queueSplit.delete()
                         this.queueSplit.touch()
                     }
-                    else
+                    else {
+                        /*  no following chunk yet: mark for intermediate preview output  */
+                        if (element.preview !== "sent") {
+                            element.preview = "pending"
+                            this.queueSplit.touch()
+                        }
                         break
+                    }
                 }
             }
@@ -157,8 +176,23 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     callback(new Error("expected text input as string chunks"))
                 else if (chunk.payload.length === 0)
                     callback()
+                else if (chunk.kind === "intermediate") {
+                    /*  intermediate chunks: pass through immediately (bypass queue)  */
+                    self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
+                    self.log("info", `send text (intermediate pass-through): ${JSON.stringify(chunk.payload)}`)
+                    this.push(chunk)
+                    callback()
+                }
                 else {
-                    self.log("info", `received text: ${JSON.stringify(chunk.payload)}`)
+                    /*  final chunks: queue for sentence splitting  */
+                    self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
+                    /*  cancel any pending preview timeout  */
+                    if (self.previewTimer !== null) {
+                        clearTimeout(self.previewTimer)
+                        self.previewTimer = null
+                    }
                     self.queueRecv.append({ type: "text-frame", chunk })
                     callback()
                 }
@@ -192,6 +226,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                     else if (element !== undefined
                         && element.type === "text-frame"
                         && element.complete === true) {
+                        /*  send all consecutive complete chunks  */
                         while (true) {
                             const nextElement = self.queueSend.peek()
                             if (nextElement === undefined)
@@ -204,12 +239,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
                             else if (nextElement.type === "text-frame"
                                 && nextElement.complete !== true)
                                 break
-                            self.log("info", `send text: ${JSON.stringify(nextElement.chunk.payload)}`)
+                            self.log("info", `send text (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)}`)
                             this.push(nextElement.chunk)
                             self.queueSend.walk(+1)
                             self.queue.trim()
                         }
                     }
+                    else if (element !== undefined
+                        && element.type === "text-frame"
+                        && element.preview === "pending") {
+                        /*  send intermediate preview (without advancing pointer)  */
+                        const previewChunk = element.chunk.clone()
+                        previewChunk.kind = "intermediate"
+                        self.log("info", `send text (intermediate preview): ${JSON.stringify(previewChunk.payload)}`)
+                        this.push(previewChunk)
+                        element.preview = "sent"
+                        self.queueSend.touch()
+                        /*  start preview timeout (if configured)  */
+                        const timeout = self.params.timeout as number
+                        if (timeout > 0 && self.previewTimer === null) {
+                            self.previewTimer = setTimeout(() => {
+                                self.previewTimer = null
+                                if (self.closing)
+                                    return
+                                /*  promote preview to final chunk  */
+                                const el = self.queueSend.peek()
+                                if (el !== undefined
+                                    && el.type === "text-frame"
+                                    && el.preview === "sent"
+                                    && el.complete !== true) {
+                                    self.log("info", `timeout: promoting intermediate to final: ${JSON.stringify(el.chunk.payload)}`)
+                                    el.complete = true
+                                    self.queueSend.touch()
+                                    self.queue.emit("write")
+                                }
+                            }, timeout)
+                        }
+                        /*  wait for more data  */
+                        if (!self.closing)
+                            self.queue.once("write", flushPendingChunks)
+                    }
                     else if (!self.closing)
                         self.queue.once("write", flushPendingChunks)
                 }
@@ -223,11 +295,15 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
         /*  indicate closing  */
         this.closing = true
-        /*  clean up timer  */
+        /*  clean up timers  */
         if (this.workingOffTimer !== null) {
             clearTimeout(this.workingOffTimer)
             this.workingOffTimer = null
         }
+        if (this.previewTimer !== null) {
+            clearTimeout(this.previewTimer)
+            this.previewTimer = null
+        }
         /*  remove any pending event listeners  */
         this.queue.removeAllListeners("write")

package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts CHANGED Viewed

@@ -128,7 +128,7 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
         await this.llm.open()
         /*  provide text-to-text spellchecking  */
-        const llm = this.llm!
+        const llm = this.llm
         const spellcheck = async (text: string) => {
             const cfg = this.setup[this.params.lang]
             if (!cfg)

package/speechflow-cli/src/speechflow-node-t2t-summary.ts CHANGED Viewed

@@ -127,7 +127,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
         await this.llm.open()
         /*  provide text summarization  */
-        const llm = this.llm!
+        const llm = this.llm
         const summarize = async (text: string) => {
             const cfg = this.setup[this.params.lang]
             if (!cfg)

package/speechflow-cli/src/speechflow-node-t2t-translate.ts CHANGED Viewed

@@ -13,7 +13,7 @@ import * as util                           from "./speechflow-util"
 import { LLM, type LLMCompleteMessage }    from "./speechflow-util-llm"
 /*  internal utility types  */
-type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
+type ConfigEntry = { systemPrompt: { [ type: string ]: string }, chat: LLMCompleteMessage[] }
 type Config      = { [ key: string ]: ConfigEntry }
 /*  SpeechFlow node for LLM-based text-to-text translation  */
@@ -28,19 +28,30 @@ export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
     private setup: Config = {
         /*  English (EN) to German (DE) translation  */
         "en-de": {
-            systemPrompt:
-                "You are a translator.\n" +
-                "Output only the requested text.\n" +
-                "Do not use markdown.\n" +
-                "Do not chat.\n" +
-                "Do not show any explanations.\n" +
-                "Do not show any introduction.\n" +
-                "Do not show any preamble.\n" +
-                "Do not show any prolog.\n" +
-                "Do not show any epilog.\n" +
-                "Get to the point.\n" +
-                "Preserve the original meaning, tone, and nuance.\n" +
-                "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
+            systemPrompt: {
+                "any":
+                    "You are a translator.\n" +
+                    "Output only the requested text.\n" +
+                    "Do not use markdown.\n" +
+                    "Do not chat.\n" +
+                    "Do not show any explanations.\n" +
+                    "Do not show any introduction.\n" +
+                    "Do not show any preamble.\n" +
+                    "Do not show any prolog.\n" +
+                    "Do not show any epilog.\n" +
+                    "Get to the point.\n" +
+                    "Preserve the original meaning, tone, and nuance.\n" +
+                    "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
+                "translategemma":
+                    /*  ATTENTION: do not change this prompt, as TranslateGemma requires this fixed format!  */
+                    "You are a professional English (en) to German (de) translator. " +
+                    "Your goal is to accurately convey the meaning and nuances of the original " +
+                    "English text while adhering to German grammar, vocabulary, and cultural sensitivities. " +
+                    "Produce only the German translation, without any additional explanations or commentary. " +
+                    "Please translate the following English text into German:\n" +
+                    "\n" +
+                    "\n"
+            },
             chat: [
                 { role: "user",      content: "I love my wife." },
                 { role: "assistant", content: "Ich liebe meine Frau." },
@@ -53,19 +64,30 @@ export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
         /*  German (DE) to English (EN) translation  */
         "de-en": {
-            systemPrompt:
-                "You are a translator.\n" +
-                "Output only the requested text.\n" +
-                "Do not use markdown.\n" +
-                "Do not chat.\n" +
-                "Do not show any explanations.\n" +
-                "Do not show any introduction.\n" +
-                "Do not show any preamble.\n" +
-                "Do not show any prolog.\n" +
-                "Do not show any epilog.\n" +
-                "Get to the point.\n" +
-                "Preserve the original meaning, tone, and nuance.\n" +
-                "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
+            systemPrompt: {
+                "any":
+                    "You are a translator.\n" +
+                    "Output only the requested text.\n" +
+                    "Do not use markdown.\n" +
+                    "Do not chat.\n" +
+                    "Do not show any explanations.\n" +
+                    "Do not show any introduction.\n" +
+                    "Do not show any preamble.\n" +
+                    "Do not show any prolog.\n" +
+                    "Do not show any epilog.\n" +
+                    "Get to the point.\n" +
+                    "Preserve the original meaning, tone, and nuance.\n" +
+                    "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
+                "translategemma":
+                    /*  ATTENTION: do not change this prompt, as TranslateGemma requires this fixed format!  */
+                    "You are a professional German (de) to English (en) translator. " +
+                    "Your goal is to accurately convey the meaning and nuances of the original " +
+                    "German text while adhering to English grammar, vocabulary, and cultural sensitivities. " +
+                    "Produce only the English translation, without any additional explanations or commentary. " +
+                    "Please translate the following German text into English:\n" +
+                    "\n" +
+                    "\n"
+            },
             chat: [
                 { role: "user",      content: "Ich liebe meine Frau." },
                 { role: "assistant", content: "I love my wife." },
@@ -120,14 +142,17 @@ export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
         await this.llm.open()
         /*  provide text-to-text translation  */
-        const llm = this.llm!
+        const llm = this.llm
         const translate = async (text: string) => {
             const key = `${this.params.src}-${this.params.dst}`
             const cfg = this.setup[key]
             if (!cfg)
                 throw new Error(`unsupported language pair: ${key}`)
+            let systemPrompt = cfg.systemPrompt["any"]
+            if (this.params.model.match(/^translategemma/))
+                systemPrompt = cfg.systemPrompt["translategemma"]
             return llm.complete({
-                system:   cfg.systemPrompt,
+                system:   systemPrompt,
                 messages: cfg.chat,
                 prompt:   text
             })