npm - speechflow - Versions diffs - 0.9.0 → 0.9.2 - Mend

speechflow 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +30 -0
package/dst/speechflow-node-deepgram.d.ts +10 -0
package/dst/speechflow-node-deepgram.js +44 -23
package/dst/speechflow-node-deepl.d.ts +10 -0
package/dst/speechflow-node-deepl.js +30 -12
package/dst/speechflow-node-device.d.ts +11 -0
package/dst/speechflow-node-device.js +73 -14
package/dst/speechflow-node-elevenlabs.d.ts +10 -0
package/dst/speechflow-node-elevenlabs.js +14 -2
package/dst/speechflow-node-ffmpeg.d.ts +11 -0
package/dst/speechflow-node-ffmpeg.js +114 -0
package/dst/speechflow-node-file.d.ts +9 -0
package/dst/speechflow-node-file.js +71 -13
package/dst/speechflow-node-gemma.d.ts +11 -0
package/dst/speechflow-node-gemma.js +152 -0
package/dst/speechflow-node-websocket.d.ts +11 -0
package/dst/speechflow-node-websocket.js +34 -6
package/dst/speechflow-node.d.ts +38 -0
package/dst/speechflow-node.js +28 -10
package/dst/speechflow.d.ts +1 -0
package/dst/speechflow.js +128 -43
package/etc/tsconfig.json +2 -0
package/package.json +25 -11
package/src/speechflow-node-deepgram.ts +55 -24
package/src/speechflow-node-deepl.ts +38 -16
package/src/speechflow-node-device.ts +88 -14
package/src/speechflow-node-elevenlabs.ts +19 -2
package/src/speechflow-node-ffmpeg.ts +122 -0
package/src/speechflow-node-file.ts +76 -14
package/src/speechflow-node-gemma.ts +169 -0
package/src/speechflow-node-websocket.ts +52 -13
package/src/speechflow-node.ts +43 -21
package/src/speechflow.ts +144 -47
package/dst/speechflow-util.js +0 -37
package/src/speechflow-util.ts +0 -36

package/src/speechflow-node-device.ts CHANGED Viewed

@@ -4,38 +4,102 @@
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
+/*  standard dependencies  */
 import Stream           from "node:stream"
+/*  external dependencies  */
 import PortAudio        from "@gpeng/naudiodon"
+/*  internal dependencies  */
 import SpeechFlowNode   from "./speechflow-node"
-import SpeechFlowUtil   from "./speechflow-util"
+/*  SpeechFlow node for device access  */
 export default class SpeechFlowNodeDevice extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "device"
+    /*  internal state  */
     private io: PortAudio.IoStreamRead | PortAudio.IoStreamWrite | PortAudio.IoStreamDuplex | null = null
+    /*  construct node  */
     constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
         super(id, opts, args)
+        /*  declare node configuration parameters  */
         this.configure({
             device: { type: "string", pos: 0,            match: /^(.+?):(.+)$/ },
             mode:   { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
         })
+        /*  declare node input/output format  */
+        if (this.params.mode === "rw") {
+            this.input  = "audio"
+            this.output = "audio"
+        }
+        else if (this.params.mode === "r") {
+            this.input  = "none"
+            this.output = "audio"
+        }
+        else if (this.params.mode === "w") {
+            this.input  = "audio"
+            this.output = "none"
+        }
+    }
+    /*  INTERNAL: utility function for finding audio device by pseudo-URL notation  */
+    private audioDeviceFromURL (mode: "any" | "r" | "w" | "rw", url: string) {
+        /*  parse URL  */
+        const m = url.match(/^(.+?):(.+)$/)
+        if (m === null)
+            throw new Error(`invalid audio device URL "${url}"`)
+        const [ , type, name ] = m
+        /*  determine audio API  */
+        const apis = PortAudio.getHostAPIs()
+        const api = apis.HostAPIs.find((api) => api.type.toLowerCase() === type.toLowerCase())
+        if (!api)
+            throw new Error(`invalid audio API type "${type}"`)
+        /*  determine device of audio API  */
+        const devices = PortAudio.getDevices()
+        const device = devices.find((device) => {
+            return (
+                (   (   mode === "r"   && device.maxInputChannels  > 0)
+                    || (mode === "w"   && device.maxOutputChannels > 0)
+                    || (mode === "rw"  && device.maxInputChannels  > 0 && device.maxOutputChannels > 0)
+                    || (mode === "any" && (device.maxInputChannels > 0 || device.maxOutputChannels > 0)))
+                && device.name.match(name)
+                && device.hostAPIName === api.name
+            )
+        })
+        if (!device)
+            throw new Error(`invalid audio device "${name}" (of audio API type "${type}")`)
+        return device
     }
+    /*  open node  */
     async open () {
         /*  determine device  */
-        const device = SpeechFlowUtil.audioDeviceFromURL(this.params.mode, this.params.device)
+        const device = this.audioDeviceFromURL(this.params.mode, this.params.device)
         /*  sanity check sample rate compatibility
             (we still do not resample in input/output for simplification reasons)  */
         if (device.defaultSampleRate !== this.config.audioSampleRate)
-            throw new Error(`device audio sample rate ${device.defaultSampleRate} is ` +
+            throw new Error(`audio device sample rate ${device.defaultSampleRate} is ` +
                 `incompatible with required sample rate ${this.config.audioSampleRate}`)
         /*  establish device connection
             Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
             declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
             so it is correct to cast it back to Stream.{Readable,Writable,Duplex}  */
-        if (device.maxInputChannels > 0 && device.maxOutputChannels > 0) {
+        /*  FIXME: the underlying PortAudio outputs verbose/debugging messages  */
+        if (this.params.mode === "rw") {
+            /*  input/output device  */
+            if (device.maxInputChannels === 0)
+                throw new Error(`device "${device.id}" does not have any input channels (required by read/write mode)`)
+            if (device.maxOutputChannels === 0)
+                throw new Error(`device "${device.id}" does not have any output channels (required by read/write mode)`)
             this.log("info", `resolved "${this.params.device}" to duplex device "${device.id}"`)
-            this.input  = "audio"
-            this.output = "audio"
             this.io = PortAudio.AudioIO({
                 inOptions: {
                     deviceId:     device.id,
@@ -52,10 +116,11 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
             })
             this.stream = this.io as unknown as Stream.Duplex
         }
-        else if (device.maxInputChannels > 0 && device.maxOutputChannels === 0) {
+        else if (this.params.mode === "r") {
+            /*  input device  */
+            if (device.maxInputChannels === 0)
+                throw new Error(`device "${device.id}" does not have any input channels (required by read mode)`)
             this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`)
-            this.input  = "none"
-            this.output = "audio"
             this.io = PortAudio.AudioIO({
                 inOptions: {
                     deviceId:     device.id,
@@ -66,10 +131,11 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
             })
             this.stream = this.io as unknown as Stream.Readable
         }
-        else if (device.maxInputChannels === 0 && device.maxOutputChannels > 0) {
+        else if (this.params.mode === "w") {
+            /*  output device  */
+            if (device.maxOutputChannels === 0)
+                throw new Error(`device "${device.id}" does not have any output channels (required by write mode)`)
             this.log("info", `resolved "${this.params.device}" to output device "${device.id}"`)
-            this.input  = "audio"
-            this.output = "none"
             this.io = PortAudio.AudioIO({
                 outOptions: {
                     deviceId:     device.id,
@@ -83,14 +149,22 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
         else
             throw new Error(`device "${device.id}" does not have any input or output channels`)
-        /*  pass-through errors  */
+        /*  pass-through PortAudio errors  */
         this.io.on("error", (err) => {
             this.emit("error", err)
         })
+        /*  start PortAudio  */
+        this.io.start()
     }
+    /*  close node  */
     async close () {
-        if (this.io !== null)
+        /*  shutdown PortAudio  */
+        if (this.io !== null) {
             this.io.quit()
+            this.io = null
+        }
     }
 }

package/src/speechflow-node-elevenlabs.ts CHANGED Viewed

@@ -4,12 +4,15 @@
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
+/*  standard dependencies  */
 import Stream                from "node:stream"
 import { EventEmitter }      from "node:events"
+/*  external dependencies  */
 import * as ElevenLabs       from "elevenlabs"
 import { getStreamAsBuffer } from "get-stream"
+/*  internal dependencies  */
 import SpeechFlowNode        from "./speechflow-node"
 /*
@@ -28,19 +31,30 @@ const elevenlabsVoices = {
 */
 export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "elevenlabs"
+    /*  internal state  */
     private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
+    /*  construct node  */
     constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
         super(id, opts, args)
+        /*  declare node configuration parameters  */
         this.configure({
             key:      { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
             voice:    { type: "string", val: "Brian",  pos: 0 },
             language: { type: "string", val: "de",     pos: 1 }
         })
-    }
-    async open () {
+        /*  declare node input/output format  */
         this.input  = "text"
         this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
         this.elevenlabs = new ElevenLabs.ElevenLabsClient({
             apiKey: this.params.key
         })
@@ -89,7 +103,10 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
             }
         })
     }
+    /*  close node  */
     async close () {
+        /*  destroy stream  */
         if (this.stream !== null) {
             this.stream.destroy()
             this.stream = null

package/src/speechflow-node-ffmpeg.ts ADDED Viewed

@@ -0,0 +1,122 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream                        from "node:stream"
+/*  external dependencies  */
+import FFmpeg                        from "@rse/ffmpeg"
+import { Converter as FFmpegStream } from "ffmpeg-stream"
+/*  internal dependencies  */
+import SpeechFlowNode                from "./speechflow-node"
+/*  SpeechFlow node for FFmpeg  */
+export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "ffmpeg"
+    /*  internal state  */
+    private ffmpegBinary = FFmpeg.supported ? FFmpeg.binary : "ffmpeg"
+    private ffmpeg: FFmpegStream | null = null
+    /*  construct node  */
+    constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            src: { type: "string", pos: 0, val: "pcm", match: /^(?:pcm|wav|mp3|opus)$/ },
+            dst: { type: "string", pos: 1, val: "wav", match: /^(?:pcm|wav|mp3|opus)$/ }
+        })
+        /*  declare node input/output format  */
+        this.input  = "audio"
+        this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
+        /*  sanity check situation  */
+        if (this.params.src === this.params.dst)
+            throw new Error("source and destination formats should not be the same")
+        /*  instantiate FFmpeg sub-process  */
+        this.ffmpeg = new FFmpegStream(this.ffmpegBinary)
+        const streamInput = this.ffmpeg.createInputStream({
+            /*  FFmpeg input options  */
+            "fflags":          "nobuffer",
+            "flags":           "low_delay",
+            "probesize":       32,
+            "analyzeduration": 0,
+            ...(this.params.src === "pcm" ? {
+                "f":           "s16le",
+                "ar":          this.config.audioSampleRate,
+                "ac":          this.config.audioChannels
+            } : {}),
+            ...(this.params.src === "wav" ? {
+                "f":           "wav"
+            } : {}),
+            ...(this.params.src === "mp3" ? {
+                "f":           "mp3"
+            } : {}),
+            ...(this.params.src === "opus" ? {
+                "f":           "opus"
+            } : {})
+        })
+        const streamOutput = this.ffmpeg.createOutputStream({
+            /*  FFmpeg output options  */
+            "flush_packets":   1,
+            ...(this.params.dst === "pcm" ? {
+                "c:a":         "pcm_s16le",
+                "ar":          this.config.audioSampleRate,
+                "ac":          this.config.audioChannels,
+                "f":           "s16le",
+            } : {}),
+            ...(this.params.dst === "wav" ? {
+                "f":           "wav"
+            } : {}),
+            ...(this.params.dst === "mp3" ? {
+                "c:a":         "libmp3lame",
+                "b:a":         "192k",
+                "f":           "mp3"
+            } : {}),
+            ...(this.params.dst === "opus" ? {
+                "acodec":      "libopus",
+                "f":           "opus"
+            } : {})
+        })
+        this.ffmpeg.run()
+        /*  establish a duplex stream and connect it to FFmpeg  */
+        this.stream = Stream.Duplex.from({
+            readable: streamOutput,
+            writable: streamInput
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  close duplex stream  */
+        if (this.stream !== null) {
+            await new Promise<void>((resolve) => {
+                if (this.stream instanceof Stream.Duplex)
+                    this.stream.end(() => { resolve() })
+                else
+                    resolve()
+            })
+            this.stream.destroy()
+            this.stream = null
+        }
+        /*  shutdown FFmpeg  */
+        if (this.ffmpeg !== null) {
+            this.ffmpeg.kill()
+            this.ffmpeg = null
+        }
+    }
+}

package/src/speechflow-node-file.ts CHANGED Viewed

@@ -4,41 +4,103 @@
 **  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
 */
+/*  standard dependencies  */
 import fs               from "node:fs"
+import Stream           from "node:stream"
+/*  internal dependencies  */
 import SpeechFlowNode   from "./speechflow-node"
-export default class SpeechFlowNodeDevice extends SpeechFlowNode {
+/*  SpeechFlow node for file access  */
+export default class SpeechFlowNodeFile extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "file"
+    /*  construct node  */
     constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
         super(id, opts, args)
+        /*  declare node configuration parameters  */
         this.configure({
             path: { type: "string", pos: 0 },
             mode: { type: "string", pos: 1, val: "r",     match: /^(?:r|w|rw)$/ },
             type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ }
         })
+        /*  declare node input/output format  */
+        if (this.params.mode === "rw") {
+            this.input  = this.params.type
+            this.output = this.params.type
+        }
+        else if (this.params.mode === "r") {
+            this.input  = "none"
+            this.output = this.params.type
+        }
+        else if (this.params.mode === "w") {
+            this.input  = this.params.type
+            this.output = "none"
+        }
     }
+    /*  open node  */
     async open () {
-        if (this.params.mode === "r") {
-            this.output = this.params.type
-            if (this.params.path === "-")
+        const encoding = this.params.type === "text" ? this.config.textEncoding : "binary"
+        if (this.params.mode === "rw") {
+            if (this.params.path === "-") {
+                /*  standard I/O  */
+                process.stdin.setEncoding(encoding)
+                process.stdout.setEncoding(encoding)
+                this.stream = Stream.Duplex.from({
+                    readable: process.stdin,
+                    writable: process.stdout
+                })
+            }
+            else {
+                /*  file I/O  */
+                this.stream = Stream.Duplex.from({
+                    readable: fs.createReadStream(this.params.path, { encoding }),
+                    writable: fs.createWriteStream(this.params.path, { encoding })
+                })
+            }
+        }
+        else if (this.params.mode === "r") {
+            if (this.params.path === "-") {
+                /*  standard I/O  */
+                process.stdin.setEncoding(encoding)
                 this.stream = process.stdin
-            else
-                this.stream = fs.createReadStream(this.params.path,
-                    { encoding: this.params.type === "text" ? this.config.textEncoding : "binary" })
+            }
+            else {
+                /*  file I/O  */
+                this.stream = fs.createReadStream(this.params.path, { encoding })
+            }
         }
         else if (this.params.mode === "w") {
-            this.input = this.params.type
-            if (this.params.path === "-")
+            if (this.params.path === "-") {
+                /*  standard I/O  */
+                process.stdout.setEncoding(encoding)
                 this.stream = process.stdout
-            else
-                this.stream = fs.createWriteStream(this.params.path,
-                    { encoding: this.params.type === "text" ? this.config.textEncoding : "binary" })
+            }
+            else {
+                /*  file I/O  */
+                this.stream = fs.createWriteStream(this.params.path, { encoding })
+            }
         }
         else
             throw new Error(`invalid file mode "${this.params.mode}"`)
     }
+    /*  close node  */
     async close () {
-        if (this.stream !== null && this.params.path !== "-") {
-            this.stream.destroy()
+        /*  shutdown stream  */
+        if (this.stream !== null) {
+            await new Promise<void>((resolve) => {
+                if (this.stream instanceof Stream.Writable || this.stream instanceof Stream.Duplex)
+                    this.stream.end(() => { resolve() })
+                else
+                    resolve()
+            })
+            if (this.params.path !== "-")
+                this.stream.destroy()
             this.stream = null
         }
     }

package/src/speechflow-node-gemma.ts ADDED Viewed

@@ -0,0 +1,169 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import Stream           from "node:stream"
+import { EventEmitter } from "node:events"
+/*  external dependencies  */
+import { Ollama }       from "ollama"
+/*  internal dependencies  */
+import SpeechFlowNode   from "./speechflow-node"
+/*  internal utility types  */
+type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
+type Config      = { [ key: string ]: ConfigEntry }
+/*  SpeechFlow node for Gemma/Ollama text-to-text translation  */
+export default class SpeechFlowNodeGemma extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "gemma"
+    /*  internal state  */
+    private ollama: Ollama | null = null
+    /*  internal LLM setup  */
+    private setup: Config = {
+        /*  English (EN) to German (DE) translation  */
+        "en-de": {
+            systemPrompt:
+                "You are a translator.\n" +
+                "Output only the requested text.\n" +
+                "Do not use markdown.\n" +
+                "Do not chat.\n" +
+                "Do not show any explanations.\n" +
+                "Do not show any introduction.\n" +
+                "Do not show any preamble.\n" +
+                "Do not show any prolog.\n" +
+                "Do not show any epilog.\n" +
+                "Get to the point.\n" +
+                "Directly translate text from Enlish (EN) to German (DE) language.\n",
+            chat: [
+                { role: "user",   content: "I love my wife." },
+                { role: "system", content: "Ich liebe meine Frau." },
+                { role: "user",   content: "The weather is wonderful." },
+                { role: "system", content: "Das Wetter ist wunderschön." },
+                { role: "user",   content: "The live is awesome." },
+                { role: "system", content: "Das Leben ist einfach großartig." }
+            ]
+        },
+        /*  German (DE) to English (EN) translation  */
+        "de-en": {
+            systemPrompt:
+                "You are a translator.\n" +
+                "Output only the requested text.\n" +
+                "Do not use markdown.\n" +
+                "Do not chat.\n" +
+                "Do not show any explanations. \n" +
+                "Do not show any introduction.\n" +
+                "Do not show any preamble. \n" +
+                "Do not show any prolog. \n" +
+                "Do not show any epilog. \n" +
+                "Get to the point.\n" +
+                "Directly translate text from German (DE) to English (EN) language.\n",
+            chat: [
+                { role: "user",   content: "Ich liebe meine Frau." },
+                { role: "system", content: "I love my wife." },
+                { role: "user",   content: "Das Wetter ist wunderschön." },
+                { role: "system", content: "The weather is wonderful." },
+                { role: "user",   content: "Das Leben ist einfach großartig." },
+                { role: "system", content: "The live is awesome." }
+            ]
+        }
+    }
+    /*  construct node  */
+    constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?:\d+$/ },
+            src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
+            dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
+        })
+        /*  sanity check situation  */
+        if (this.params.src === this.params.dst)
+            throw new Error("source and destination languages cannot be the same")
+        /*  declare node input/output format  */
+        this.input  = "text"
+        this.output = "text"
+    }
+    /*  open node  */
+    async open () {
+        /*  instantiate Ollama API  */
+        this.ollama = new Ollama({ host: this.params.api })
+        /*  provide text-to-text translation  */
+        const translate = async (text: string) => {
+            const key = `${this.params.src}-${this.params.dst}`
+            const cfg = this.setup[key]
+            const response = await this.ollama!.chat({
+                model: "gemma3:4b-it-q4_K_M",
+                messages: [
+                    { role: "system", content: cfg.systemPrompt },
+                    ...cfg.chat,
+                    { role: "user", content: text }
+                ],
+                keep_alive: "10m",
+                options: {
+                    repeat_penalty: 1.1,
+                    temperature:    0.7,
+                    seed:           1,
+                    top_k:          10,
+                    top_p:          0.5
+                }
+            })
+            return response.message.content
+        }
+        /*  establish a duplex stream and connect it to Ollama  */
+        const queue = new EventEmitter()
+        this.stream = new Stream.Duplex({
+            write (chunk: Buffer, encoding, callback) {
+                const data = chunk.toString()
+                if (data === "") {
+                    queue.emit("result", "")
+                    callback()
+                }
+                else {
+                    translate(data).then((result) => {
+                        queue.emit("result", result)
+                        callback()
+                    }).catch((err) => {
+                        callback(err)
+                    })
+                }
+            },
+            read (size) {
+                queue.once("result", (result: string) => {
+                    this.push(result)
+                })
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy()
+            this.stream = null
+        }
+        /*  shutdown Ollama  */
+        if (this.ollama !== null) {
+            this.ollama.abort()
+            this.ollama = null
+        }
+    }
+}