npm - speechflow - Versions diffs - 1.4.3 → 1.4.5 - Mend

speechflow 1.4.3 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,21 @@
 ChangeLog
 =========
+1.4.5 (2025-08-07)
+------------------
+- IMPROVEMENT: better CLI option handling
+- IMPROVEMENT: better optical appearance of dashboard
+- BUGFIX: do not complain if no .env file is found
+- BUGFIX: avoid read-timeouts in "deepgram" node
+- CLEANUP: output stack traces only for "info" and "debug" verbosity levels
+1.4.4 (2025-08-07)
+------------------
+- BUGFIX: do not ignore "dst" files in NPM distribution
+- UPGRADE: upgrade NPM dependencies
 1.4.3 (2025-08-06)
 ------------------

package/README.md CHANGED Viewed

@@ -53,6 +53,59 @@ derived from the exported `SpeechFlowNode` class of the `speechflow` package.
 **SpeechFlow** is written in TypeScript and
 ships as an installable package for the Node Package Manager (NPM).
+Impression
+----------
+**SpeechFlow** is a command-line interface (CLI) based tool, so there
+is no exciting screenshot possible from its CLI appearance, of course.
+Instead, here is a sample of a fictive training which is held in German
+and real-time translated to English.
+First, the used configuration was a straight linear pipeline in file `sample.conf`:
+```txt
+device(device: "coreaudio:Elgato Wave:3", mode: "r") |
+meter(interval: 50, dashboard: "meter1") |
+deepgram(language: "de", model: "nova-2", interim: true) |
+trace(type: "text", dashboard: "text1") |
+filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
+sentence() |
+trace(type: "text", dashboard: "text2") |
+deepl(src: "de", dst: "en") |
+trace(type: "text", dashboard: "text3") |
+elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
+meter(interval: 50, dashboard: "meter2") |
+device(device: "coreaudio:USBAudio2.0", mode: "w")
+```
+Second, the corresponding **SpeechFlow** command was:
+```sh
+$ speechflow -v info -c sample.conf \
+  -d audio:meter1:DE,text:text1:DE-Interim,text:text2:DE-Final,text:text3:EN,audio:meter2:EN
+```
+Finally, the resulting dashboard under URL `http://127.0.0.1:8484/` was:
+![dashboard](etc/speechflow.png)
+On the left you can see the volume meter of the microphone (`device`),
+followed by the German result of the speech-to-text conversion
+(`deepgram`), followed by the still German results of the text-to-text
+sentence splitting/aggregation (`sentence`), followed by the English
+results of the text-to-text translation (`deepl`) and then finally on
+the right you can see the volume meter of the text-to-speech conversion
+(`elevenlabs`).
+The entire **SpeechFlow** processing pipeline runs in real-time and
+the latency between input and output audio is about 2-3 seconds, very
+similar to the usual latency human live translators also cause. The
+latency primarily comes from the speech-to-text part in the pipeline,
+as the end of sentences have to be awaited -- especially in the German
+language where the verb can come very late in a sentence. So, the
+latency is primarily not caused by any technical aspects, but by the
+nature of live translation.
 Installation
 ------------

package/etc/speechflow.png ADDED Viewed

Binary file

package/etc/speechflow.yaml CHANGED Viewed

@@ -79,31 +79,30 @@ studio-transcription: |
 studio-translation: |
     device(device: "coreaudio:Elgato Wave:3", mode: "r") | {
         gender() | {
-            meter(interval: 250) |
+            meter(interval: 250, dashboard: "meter1") |
                 wav(mode: "encode") |
                     file(path: "program-de.wav", mode: "w", type: "audio"),
-            deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) | {
+            deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY, interim: true) | {
+                trace(name: "trace1", type: "text", dashboard: "text1")
                 subtitle(format: "vtt", words: true) |
                     file(path: "program-de.vtt", mode: "w", type: "text"),
                 sentence() | {
-                    format(width: 80) |
-                        file(path: "program-de.txt", mode: "w", type: "text"),
+                    trace(name: "trace2", type: "text", notify: true, dashboard: "text2") |
+                        format(width: 80) |
+                            file(path: "program-de.txt", mode: "w", type: "text"),
                     deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) | {
-                        trace(name: "text", type: "text") | {
+                        trace(name: "trace3", type: "text", dashboard: "text3") | {
                             format(width: 80) |
                                 file(path: "program-en.txt", mode: "w", type: "text"),
                             subtitle(format: "vtt", words: false) |
                                 file(path: "program-en.vtt", mode: "w", type: "text"),
-                            mqtt(url: "mqtt://10.1.0.10:1883",
-                                username: env.SPEECHFLOW_MQTT_USER,
-                                password: env.SPEECHFLOW_MQTT_PASS,
-                                topicWrite: "stream/studio/sender"),
                             {
                                 filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
                                     elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
                                 filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
                                     elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
                             } | {
+                                meter(interval: 250, dashboard: "meter2", dashboard: "meter2"),
                                 wav(mode: "encode") |
                                     file(path: "program-en.wav", mode: "w", type: "audio"),
                                 device(device: "coreaudio:USBAudio2.0", mode: "w")
@@ -115,3 +114,20 @@ studio-translation: |
         }
     }
+#   Test-drive for development
+test: |
+    device(device: "coreaudio:Elgato Wave:3", mode: "r") |
+        meter(interval: 50, dashboard: "meter1") |
+            deepgram(language: "de", model: "nova-2", key: env.SPEECHFLOW_DEEPGRAM_KEY, interim: true) |
+                trace(type: "text", dashboard: "text1") | {
+                    subtitle(mode: "render", addr: "127.0.0.1", port: 8585),
+                    filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
+                        sentence() |
+                            trace(type: "text", dashboard: "text2") |
+                                deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) |
+                                    trace(type: "text", dashboard: "text3") |
+                                        elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
+                                            meter(interval: 50, dashboard: "meter2") |
+                                                device(device: "coreaudio:USBAudio2.0", mode: "w")
+                }

package/package.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "name":             "speechflow",
-    "version":          "1.4.3",
-    "x-stdver":         "1.4.3-GA",
-    "x-release":        "2025-08-06",
+    "version":          "1.4.5",
+    "x-stdver":         "1.4.5-GA",
+    "x-release":        "2025-08-07",
     "homepage":         "https://github.com/rse/speechflow",
     "description":      "Speech Processing Flow Graph",
     "license":          "GPL-3.0-only",

package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import SpeechFlowNode from "./speechflow-node";
+export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
+    static name: string;
+    private ffmpegBinary;
+    private ffmpeg;
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
+        [id: string]: any;
+    }, args: any[]);
+    open(): Promise<void>;
+    close(): Promise<void>;
+}

package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js ADDED Viewed

@@ -0,0 +1,153 @@
+"use strict";
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+/*  standard dependencies  */
+const node_stream_1 = __importDefault(require("node:stream"));
+/*  external dependencies  */
+const ffmpeg_1 = __importDefault(require("@rse/ffmpeg"));
+const ffmpeg_stream_1 = require("ffmpeg-stream");
+/*  internal dependencies  */
+const speechflow_node_1 = __importDefault(require("./speechflow-node"));
+const utils = __importStar(require("./speechflow-utils"));
+/*  SpeechFlow node for FFmpeg  */
+class SpeechFlowNodeFFmpeg extends speechflow_node_1.default {
+    /*  declare official node name  */
+    static name = "ffmpeg";
+    /*  internal state  */
+    ffmpegBinary = ffmpeg_1.default.supported ? ffmpeg_1.default.binary : "ffmpeg";
+    ffmpeg = null;
+    /*  construct node  */
+    constructor(id, cfg, opts, args) {
+        super(id, cfg, opts, args);
+        /*  declare node configuration parameters  */
+        this.configure({
+            src: { type: "string", pos: 0, val: "pcm", match: /^(?:pcm|wav|mp3|opus)$/ },
+            dst: { type: "string", pos: 1, val: "wav", match: /^(?:pcm|wav|mp3|opus)$/ }
+        });
+        /*  declare node input/output format  */
+        this.input = "audio";
+        this.output = "audio";
+    }
+    /*  open node  */
+    async open() {
+        /*  sanity check situation  */
+        if (this.params.src === this.params.dst)
+            throw new Error("source and destination formats should not be the same");
+        /*  instantiate FFmpeg sub-process  */
+        this.ffmpeg = new ffmpeg_stream_1.Converter(this.ffmpegBinary);
+        const streamInput = this.ffmpeg.createInputStream({
+            /*  FFmpeg input options  */
+            "fflags": "nobuffer",
+            "flags": "low_delay",
+            "probesize": 32,
+            "analyzeduration": 0,
+            ...(this.params.src === "pcm" ? {
+                "f": "s16le",
+                "ar": this.config.audioSampleRate,
+                "ac": this.config.audioChannels
+            } : {}),
+            ...(this.params.src === "wav" ? {
+                "f": "wav"
+            } : {}),
+            ...(this.params.src === "mp3" ? {
+                "f": "mp3"
+            } : {}),
+            ...(this.params.src === "opus" ? {
+                "f": "opus"
+            } : {})
+        });
+        const streamOutput = this.ffmpeg.createOutputStream({
+            /*  FFmpeg output options  */
+            "flush_packets": 1,
+            ...(this.params.dst === "pcm" ? {
+                "c:a": "pcm_s16le",
+                "ar": this.config.audioSampleRate,
+                "ac": this.config.audioChannels,
+                "f": "s16le",
+            } : {}),
+            ...(this.params.dst === "wav" ? {
+                "f": "wav"
+            } : {}),
+            ...(this.params.dst === "mp3" ? {
+                "c:a": "libmp3lame",
+                "b:a": "192k",
+                "f": "mp3"
+            } : {}),
+            ...(this.params.dst === "opus" ? {
+                "acodec": "libopus",
+                "f": "opus"
+            } : {})
+        });
+        this.ffmpeg.run();
+        /*  establish a duplex stream and connect it to FFmpeg  */
+        this.stream = node_stream_1.default.Duplex.from({
+            writable: streamInput,
+            readable: streamOutput
+        });
+        /*  wrap streams with conversions for chunk vs plain audio  */
+        const wrapper1 = utils.createTransformStreamForWritableSide();
+        const wrapper2 = utils.createTransformStreamForReadableSide("audio", () => this.timeZero);
+        this.stream = node_stream_1.default.compose(wrapper1, this.stream, wrapper2);
+    }
+    /*  close node  */
+    async close() {
+        /*  close duplex stream  */
+        if (this.stream !== null) {
+            await new Promise((resolve) => {
+                if (this.stream instanceof node_stream_1.default.Duplex)
+                    this.stream.end(() => { resolve(); });
+                else
+                    resolve();
+            });
+            this.stream.destroy();
+            this.stream = null;
+        }
+        /*  shutdown FFmpeg  */
+        if (this.ffmpeg !== null) {
+            this.ffmpeg.kill();
+            this.ffmpeg = null;
+        }
+    }
+}
+exports.default = SpeechFlowNodeFFmpeg;
+//# sourceMappingURL=speechflow-node-a2a-ffmpeg.js.map

package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"speechflow-node-a2a-ffmpeg.js","sourceRoot":"","sources":["../src/speechflow-node-a2a-ffmpeg.ts"],"names":[],"mappings":";AAAA;;;;EAIE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEF,6BAA6B;AAC7B,8DAAuD;AAEvD,6BAA6B;AAC7B,yDAAuD;AACvD,iDAAyD;AAEzD,6BAA6B;AAC7B,wEAA6D;AAC7D,0DAA8D;AAE9D,kCAAkC;AAClC,MAAqB,oBAAqB,SAAQ,yBAAc;IAC5D,kCAAkC;IAC3B,MAAM,CAAC,IAAI,GAAG,QAAQ,CAAA;IAE7B,sBAAsB;IACd,YAAY,GAAG,gBAAM,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAM,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAA;IAC1D,MAAM,GAAwB,IAAI,CAAA;IAE1C,sBAAsB;IACtB,YAAa,EAAU,EAAE,GAA4B,EAAE,IAA6B,EAAE,IAAW;QAC7F,KAAK,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QAE1B,6CAA6C;QAC7C,IAAI,CAAC,SAAS,CAAC;YACX,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE;YAC5E,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE;SAC/E,CAAC,CAAA;QAEF,wCAAwC;QACxC,IAAI,CAAC,KAAK,GAAI,OAAO,CAAA;QACrB,IAAI,CAAC,MAAM,GAAG,OAAO,CAAA;IACzB,CAAC;IAED,iBAAiB;IACjB,KAAK,CAAC,IAAI;QACN,8BAA8B;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,IAAI,CAAC,MAAM,CAAC,GAAG;YACnC,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAA;QAE5E,sCAAsC;QACtC,IAAI,CAAC,MAAM,GAAG,IAAI,yBAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;QACjD,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC;YAC9C,4BAA4B;YAC5B,QAAQ,EAAW,UAAU;YAC7B,OAAO,EAAY,WAAW;YAC9B,WAAW,EAAQ,EAAE;YACrB,iBAAiB,EAAE,CAAC;YACpB,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC5B,GAAG,EAAY,OAAO;gBACtB,IAAI,EAAW,IAAI,CAAC,MAAM,CAAC,eAAe;gBAC1C,IAAI,EAAW,IAAI,CAAC,MAAM,CAAC,aAAa;aAC3C,CAAC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC5B,GAAG,EAAY,KAAK;aACvB,CAAC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC5B,GAAG,EAAY,KAAK;aACvB,CAAC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC;gBAC7B,GAAG,EAAY,MAAM;aACxB,CAAC,CAAC,CAAC,EAAE,CAAC;SACV,CAAC,CAAA;QACF,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;YAChD,6BAA6B;YAC7B,eAAe,EAAI,CAAC;YACpB,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC5B,KAAK,EAAU,WAAW;gBAC1B,IAAI,EAAW,IAAI,CAAC,MAAM,CAAC,eAAe;gBAC1C,IAAI,EAAW,IAAI,CAAC,MAAM,CAAC,aAAa;gBACxC,GAAG,EAAY,OAAO;aACzB,CAAC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC5B,GAAG,EAAY,KAAK;aACvB,CAAC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC5B,KAAK,EAAU,YAAY;gBAC3B,KAAK,EAAU,MAAM;gBACrB,GAAG,EAAY,KAAK;aACvB,CAAC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC;gBAC7B,QAAQ,EAAO,SAAS;gBACxB,GAAG,EAAY,MAAM;aACxB,CAAC,CAAC,CAAC,EAAE,CAAC;SACV,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAA;QAEjB,0DAA0D;QAC1D,IAAI,CAAC,MAAM,GAAG,qBAAM,CAAC,MAAM,CAAC,IAAI,CAAC;YAC7B,QAAQ,EAAE,WAAW;YACrB,QAAQ,EAAE,YAAY;SACzB,CAAC,CAAA;QAEF,8DAA8D;QAC9D,MAAM,QAAQ,GAAG,KAAK,CAAC,oCAAoC,EAAE,CAAA;QAC7D,MAAM,QAAQ,GAAG,KAAK,CAAC,oCAAoC,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACzF,IAAI,CAAC,MAAM,GAAG,qBAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAA;IACjE,CAAC;IAED,kBAAkB;IAClB,KAAK,CAAC,KAAK;QACP,2BAA2B;QAC3B,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;YACvB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;gBAChC,IAAI,IAAI,CAAC,MAAM,YAAY,qBAAM,CAAC,MAAM;oBACpC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,OAAO,EAAE,CAAA,CAAC,CAAC,CAAC,CAAA;;oBAEpC,OAAO,EAAE,CAAA;YACjB,CAAC,CAAC,CAAA;YACF,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAA;YACrB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAA;QACtB,CAAC;QAED,uBAAuB;QACvB,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAA;YAClB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAA;QACtB,CAAC;IACL,CAAC;;AA3GL,uCA4GC"}

package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import SpeechFlowNode from "./speechflow-node";
+export default class SpeechFlowNodeGender extends SpeechFlowNode {
+    static name: string;
+    private static speexInitialized;
+    private classifier;
+    private queue;
+    private queueRecv;
+    private queueAC;
+    private queueSend;
+    private shutdown;
+    private workingOffTimer;
+    private progressInterval;
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
+        [id: string]: any;
+    }, args: any[]);
+    open(): Promise<void>;
+    close(): Promise<void>;
+}