npm - speechflow - Versions diffs - 0.9.8 → 0.9.9 - Mend

speechflow 0.9.8 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +10 -0
package/LICENSE.txt +674 -0
package/README.md +66 -16
package/dst/speechflow-node-a2a-vad.d.ts +16 -0
package/dst/speechflow-node-a2a-vad.js +431 -0
package/dst/speechflow-node-t2a-kokoro.d.ts +13 -0
package/dst/speechflow-node-t2a-kokoro.js +147 -0
package/dst/speechflow-node-t2t-gemma.js +23 -3
package/dst/speechflow-node-t2t-ollama.d.ts +13 -0
package/dst/speechflow-node-t2t-ollama.js +245 -0
package/dst/speechflow-node-t2t-openai.d.ts +13 -0
package/dst/speechflow-node-t2t-openai.js +225 -0
package/dst/speechflow-node-t2t-opus.js +1 -1
package/dst/speechflow-node-t2t-transformers.d.ts +14 -0
package/dst/speechflow-node-t2t-transformers.js +260 -0
package/dst/speechflow-node-x2x-trace.js +2 -2
package/dst/speechflow.js +86 -40
package/etc/speechflow.yaml +9 -2
package/etc/stx.conf +1 -1
package/package.json +7 -6
package/src/speechflow-node-t2a-kokoro.ts +160 -0
package/src/{speechflow-node-t2t-gemma.ts → speechflow-node-t2t-ollama.ts} +44 -10
package/src/speechflow-node-t2t-openai.ts +246 -0
package/src/speechflow-node-t2t-transformers.ts +244 -0
package/src/speechflow-node-x2x-trace.ts +2 -2
package/src/speechflow.ts +86 -40
package/src/speechflow-node-t2t-opus.ts +0 -111

package/src/speechflow-node-t2t-openai.ts ADDED Viewed

@@ -0,0 +1,246 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream           from "node:stream"
+/*  external dependencies  */
+import OpenAI           from "openai"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+/*  internal utility types  */
+type ConfigEntry = { systemPrompt: string, chat: OpenAI.ChatCompletionMessageParam[] }
+type Config      = { [ key: string ]: ConfigEntry }
+/*  SpeechFlow node for OpenAI/GPT text-to-text translation  */
+export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "openai"
+    /*  internal state  */
+    private openai: OpenAI | null = null
+    /*  internal LLM setup  */
+    private setup: Config = {
+        /*  English (EN) spellchecking only  */
+        "en-en": {
+            systemPrompt:
+                "You are a proofreader and spellchecker for English.\n" +
+                "Output only the corrected text.\n" +
+                "Do NOT use markdown.\n" +
+                "Do NOT give any explanations.\n" +
+                "Do NOT give any introduction.\n" +
+                "Do NOT give any comments.\n" +
+                "Do NOT give any preamble.\n" +
+                "Do NOT give any prolog.\n" +
+                "Do NOT give any epilog.\n" +
+                "Do NOT change the gammar.\n" +
+                "Do NOT use synonyms for words.\n" +
+                "Keep all words.\n" +
+                "Fill in missing commas.\n" +
+                "Fill in missing points.\n" +
+                "Fill in missing question marks.\n" +
+                "Fill in missing hyphens.\n" +
+                "Focus ONLY on the word spelling.\n" +
+                "The text you have to correct is:\n",
+            chat: [
+                { role: "user",   content: "I luve my wyfe" },
+                { role: "system", content: "I love my wife." },
+                { role: "user",   content: "The weether is wunderfull!" },
+                { role: "system", content: "The weather is wonderful!" },
+                { role: "user",   content: "The live awesome but I'm hungry." },
+                { role: "system", content: "The live is awesome, but I'm hungry." }
+            ]
+        },
+        /*  German (DE) spellchecking only  */
+        "de-de": {
+            systemPrompt:
+                "Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
+                "Gib nur den korrigierten Text aus.\n" +
+                "Benutze KEIN Markdown.\n" +
+                "Gib KEINE Erklärungen.\n" +
+                "Gib KEINE Einleitung.\n" +
+                "Gib KEINE Kommentare.\n" +
+                "Gib KEINE Preamble.\n" +
+                "Gib KEINEN Prolog.\n" +
+                "Gib KEINEN Epilog.\n" +
+                "Ändere NICHT die Grammatik.\n" +
+                "Verwende KEINE Synonyme für Wörter.\n" +
+                "Behalte alle Wörter bei.\n" +
+                "Füge fehlende Kommas ein.\n" +
+                "Füge fehlende Punkte ein.\n" +
+                "Füge fehlende Fragezeichen ein.\n" +
+                "Füge fehlende Bindestriche ein.\n" +
+                "Füge fehlende Gedankenstriche ein.\n" +
+                "Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
+                "Der von dir zu korrigierende Text ist:\n",
+            chat: [
+                { role: "user",   content: "Ich ljebe meine Frao" },
+                { role: "system", content: "Ich liebe meine Frau." },
+                { role: "user",   content: "Die Wedter ist wunderschoen." },
+                { role: "system", content: "Das Wetter ist wunderschön." },
+                { role: "user",   content: "Das Leben einfach großartig aber ich bin hungrig." },
+                { role: "system", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
+            ]
+        },
+        /*  English (EN) to German (DE) translation  */
+        "en-de": {
+            systemPrompt:
+                "You are a translator.\n" +
+                "Output only the requested text.\n" +
+                "Do not use markdown.\n" +
+                "Do not chat.\n" +
+                "Do not show any explanations.\n" +
+                "Do not show any introduction.\n" +
+                "Do not show any preamble.\n" +
+                "Do not show any prolog.\n" +
+                "Do not show any epilog.\n" +
+                "Get to the point.\n" +
+                "Preserve the original meaning, tone, and nuance.\n" +
+                "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
+            chat: [
+                { role: "user",   content: "I love my wife." },
+                { role: "system", content: "Ich liebe meine Frau." },
+                { role: "user",   content: "The weather is wonderful." },
+                { role: "system", content: "Das Wetter ist wunderschön." },
+                { role: "user",   content: "The live is awesome." },
+                { role: "system", content: "Das Leben ist einfach großartig." }
+            ]
+        },
+        /*  German (DE) to English (EN) translation  */
+        "de-en": {
+            systemPrompt:
+                "You are a translator.\n" +
+                "Output only the requested text.\n" +
+                "Do not use markdown.\n" +
+                "Do not chat.\n" +
+                "Do not show any explanations. \n" +
+                "Do not show any introduction.\n" +
+                "Do not show any preamble. \n" +
+                "Do not show any prolog. \n" +
+                "Do not show any epilog. \n" +
+                "Get to the point.\n" +
+                "Preserve the original meaning, tone, and nuance.\n" +
+                "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
+            chat: [
+                { role: "user",   content: "Ich liebe meine Frau." },
+                { role: "system", content: "I love my wife." },
+                { role: "user",   content: "Das Wetter ist wunderschön." },
+                { role: "system", content: "The weather is wonderful." },
+                { role: "user",   content: "Das Leben ist einfach großartig." },
+                { role: "system", content: "The live is awesome." }
+            ]
+        }
+    }
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            src:   { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/   },
+            dst:   { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/   },
+            key:   { type: "string", val: process.env.SPEECHFLOW_KEY_OPENAI },
+            api:   { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+?:\d+$/ },
+            model: { type: "string", val: "gpt-4o-mini" }
+        })
+        /*  tell effective mode  */
+        if (this.params.src === this.params.dst)
+            this.log("info", `OpenAI: operation mode: spellchecking for language "${this.params.src}"`)
+        else
+            this.log("info", `OpenAI: operation mode: translation from language "${this.params.src}"` +
+                ` to language "${this.params.dst}"`)
+        /*  declare node input/output format  */
+        this.input  = "text"
+        this.output = "text"
+    }
+    /*  open node  */
+    async open () {
+        /*  instantiate OpenAI API  */
+        this.openai = new OpenAI({
+            baseURL: this.params.api,
+            apiKey:  this.params.key,
+            dangerouslyAllowBrowser: true
+        })
+        /*  provide text-to-text translation  */
+        const translate = async (text: string) => {
+            const key = `${this.params.src}-${this.params.dst}`
+            const cfg = this.setup[key]
+            const stream = this.openai!.chat.completions.stream({
+                stream:                true,
+                model:                 this.params.model,
+                seed:                  null,
+                temperature:           0.7,
+                n:                     1,
+                messages: [
+                    { role: "system", content: cfg.systemPrompt },
+                    ...cfg.chat,
+                    { role: "user", content: text }
+                ]
+            })
+            const completion = await stream.finalChatCompletion()
+            const translation = completion.choices[0].message.content!
+            if (!stream.ended)
+                stream.abort()
+            return translation
+        }
+        /*  establish a duplex stream and connect it to OpenAI  */
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk, encoding, callback) {
+                if (Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else {
+                    if (chunk.payload === "") {
+                        this.push(chunk)
+                        callback()
+                    }
+                    else {
+                        translate(chunk.payload).then((payload) => {
+                            const chunkNew = chunk.clone()
+                            chunkNew.payload = payload
+                            this.push(chunkNew)
+                            callback()
+                        }).catch((err) => {
+                            callback(err)
+                        })
+                    }
+                }
+            },
+            final (callback) {
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+        /*  shutdown OpenAI  */
+        if (this.openai !== null)
+            this.openai = null
+    }
+}

package/src/speechflow-node-t2t-transformers.ts ADDED Viewed

@@ -0,0 +1,244 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import path             from "node:path"
+import Stream           from "node:stream"
+/*  external dependencies  */
+import * as Transformers from "@huggingface/transformers"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+/*  internal utility types  */
+type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
+type Config      = { [ key: string ]: ConfigEntry }
+/*  SpeechFlow node for Transformers text-to-text translation  */
+export default class SpeechFlowNodeTransformers extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "transformers"
+    /*  internal state  */
+    private translator: Transformers.TranslationPipeline    | null = null
+    private generator:  Transformers.TextGenerationPipeline | null = null
+    /*  internal LLM setup  */
+    private setup: Config = {
+        /*  SmolLM3: English (EN) to German (DE) translation  */
+        "SmolLM3:en-de": {
+            systemPrompt:
+                "/no_think\n" +
+                "You are a translator.\n" +
+                "Output only the requested text.\n" +
+                "Do not use markdown.\n" +
+                "Do not chat.\n" +
+                "Do not show any explanations.\n" +
+                "Do not show any introduction.\n" +
+                "Do not show any preamble.\n" +
+                "Do not show any prolog.\n" +
+                "Do not show any epilog.\n" +
+                "Get to the point.\n" +
+                "Preserve the original meaning, tone, and nuance.\n" +
+                "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
+            chat: [
+                { role: "user",   content: "I love my wife." },
+                { role: "assistant", content: "Ich liebe meine Frau." },
+                { role: "user",   content: "The weather is wonderful." },
+                { role: "assistant", content: "Das Wetter ist wunderschön." },
+                { role: "user",   content: "The live is awesome." },
+                { role: "assistant", content: "Das Leben ist einfach großartig." }
+            ]
+        },
+        /*  SmolLM3: German (DE) to English (EN) translation  */
+        "SmolLM3:de-en": {
+            systemPrompt:
+                "/no_think\n" +
+                "You are a translator.\n" +
+                "Output only the requested text.\n" +
+                "Do not use markdown.\n" +
+                "Do not chat.\n" +
+                "Do not show any explanations.\n" +
+                "Do not show any introduction.\n" +
+                "Do not show any preamble. \n" +
+                "Do not show any prolog. \n" +
+                "Do not show any epilog. \n" +
+                "Get to the point.\n" +
+                "Preserve the original meaning, tone, and nuance.\n" +
+                "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
+            chat: [
+                { role: "user",   content: "Ich liebe meine Frau." },
+                { role: "assistant", content: "I love my wife." },
+                { role: "user",   content: "Das Wetter ist wunderschön." },
+                { role: "assistant", content: "The weather is wonderful." },
+                { role: "user",   content: "Das Leben ist einfach großartig." },
+                { role: "assistant", content: "The live is awesome." }
+            ]
+        }
+    }
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            src:   { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
+            dst:   { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
+            model: { type: "string", val: "OPUS", match: /^(?:OPUS|SmolLM3)$/ }
+        })
+        /*  sanity check parameters  */
+        if (this.params.src === this.params.dst)
+            throw new Error("source and destination languages cannot be the same")
+        /*  declare node input/output format  */
+        this.input  = "text"
+        this.output = "text"
+    }
+    /*  open node  */
+    async open () {
+        /*  instantiate Transformers engine and model  */
+        let model: string = ""
+        const progressState = new Map<string, number>()
+        const progressCallback = (progress: any) => {
+            let artifact = model
+            if (typeof progress.file === "string")
+                artifact += `:${progress.file}`
+            let percent = 0
+            if (typeof progress.loaded === "number" && typeof progress.total === "number")
+                percent = (progress.loaded as number / progress.total as number) * 100
+            else if (typeof progress.progress === "number")
+                percent = progress.progress
+            if (percent > 0)
+                progressState.set(artifact, percent)
+        }
+        const interval = setInterval(() => {
+            for (const [ artifact, percent ] of progressState) {
+                this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
+                if (percent >= 1.0)
+                    progressState.delete(artifact)
+            }
+        }, 1000)
+        if (this.params.model === "OPUS") {
+            model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
+            this.translator = await Transformers.pipeline("translation", model, {
+                cache_dir: path.join(this.config.cacheDir, "opus"),
+                dtype:     "q4",
+                device:    "gpu",
+                progress_callback: progressCallback
+            })
+            clearInterval(interval)
+            if (this.translator === null)
+                throw new Error("failed to instantiate translator pipeline")
+        }
+        else if (this.params.model === "SmolLM3") {
+            model = "HuggingFaceTB/SmolLM3-3B-ONNX"
+            this.generator = await Transformers.pipeline("text-generation", model, {
+                cache_dir: path.join(this.config.cacheDir, "transformers"),
+                dtype:     "q4",
+                device:    "gpu",
+                progress_callback: progressCallback
+            })
+            clearInterval(interval)
+            if (this.generator === null)
+                throw new Error("failed to instantiate generator pipeline")
+        }
+        else
+            throw new Error("invalid model")
+        /*  provide text-to-text translation  */
+        const translate = async (text: string) => {
+            if (this.params.model === "OPUS") {
+                const result = await this.translator!(text)
+                return Array.isArray(result) ?
+                    (result[0] as Transformers.TranslationSingle).translation_text :
+                    (result as Transformers.TranslationSingle).translation_text
+            }
+            else if (this.params.model === "SmolLM3") {
+                const key = `SmolLM3:${this.params.src}-${this.params.dst}`
+                const cfg = this.setup[key]
+                const messages = [
+                    { role: "system", content: cfg.systemPrompt },
+                    ...cfg.chat,
+                    { role: "user", content: text }
+                ]
+                const result = await this.generator!(messages, {
+                    max_new_tokens: 100,
+                    temperature:    0.6,
+                    top_p:          0.95,
+                    streamer: new Transformers.TextStreamer(this.generator!.tokenizer, {
+                        skip_prompt:         true,
+                        skip_special_tokens: true
+                    })
+                })
+                const generatedText = Array.isArray(result) ?
+                    (result[0] as Transformers.TextGenerationSingle).generated_text :
+                    (result as Transformers.TextGenerationSingle).generated_text
+                const response = typeof generatedText === "string" ?
+                    generatedText :
+                    generatedText.at(-1)!.content
+                return response
+            }
+            else
+                throw new Error("invalid model")
+        }
+        /*  establish a duplex stream and connect it to Transformers  */
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk, encoding, callback) {
+                if (Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else {
+                    if (chunk.payload === "") {
+                        this.push(chunk)
+                        callback()
+                    }
+                    else {
+                        translate(chunk.payload).then((payload) => {
+                            chunk = chunk.clone()
+                            chunk.payload = payload
+                            this.push(chunk)
+                            callback()
+                        }).catch((err) => {
+                            callback(err)
+                        })
+                    }
+                }
+            },
+            final (callback) {
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+        /*  shutdown Transformers  */
+        if (this.translator !== null) {
+            this.translator.dispose()
+            this.translator = null
+        }
+        if (this.generator !== null) {
+            this.generator.dispose()
+            this.generator = null
+        }
+    }
+}

package/src/speechflow-node-x2x-trace.ts CHANGED Viewed

@@ -52,7 +52,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
                 const fmt = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
                 if (Buffer.isBuffer(chunk.payload)) {
                     if (type === "audio")
-                        log("info", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
+                        log("debug", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
                             `end=${fmt(chunk.timestampEnd)} kind=${chunk.kind} type=${chunk.type} ` +
                             `payload-type=Buffer payload-bytes=${chunk.payload.byteLength}`)
                     else
@@ -60,7 +60,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
                 }
                 else {
                     if (type === "text")
-                        log("info", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
+                        log("debug", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
                             `end=${fmt(chunk.timestampEnd)} kind=${chunk.kind} type=${chunk.type}` +
                             `payload-type=String payload-length=${chunk.payload.length} ` +
                             `payload-encoding=${encoding} payload-content="${chunk.payload.toString()}"`)