npm - speechflow - Versions diffs - 0.9.7 → 0.9.9 - Mend

speechflow 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +15 -0
package/LICENSE.txt +674 -0
package/README.md +67 -18
package/dst/speechflow-node-a2a-vad.d.ts +16 -0
package/dst/speechflow-node-a2a-vad.js +431 -0
package/dst/speechflow-node-t2a-kokoro.d.ts +13 -0
package/dst/speechflow-node-t2a-kokoro.js +147 -0
package/dst/speechflow-node-t2t-gemma.js +23 -3
package/dst/speechflow-node-t2t-ollama.d.ts +13 -0
package/dst/speechflow-node-t2t-ollama.js +245 -0
package/dst/speechflow-node-t2t-openai.d.ts +13 -0
package/dst/speechflow-node-t2t-openai.js +225 -0
package/dst/speechflow-node-t2t-opus.js +1 -1
package/dst/speechflow-node-t2t-transformers.d.ts +14 -0
package/dst/speechflow-node-t2t-transformers.js +260 -0
package/dst/speechflow-node-x2x-trace.js +2 -2
package/dst/speechflow.js +86 -40
package/etc/speechflow.bat +6 -0
package/etc/speechflow.sh +5 -0
package/{sample.yaml → etc/speechflow.yaml} +9 -2
package/etc/stx.conf +1 -1
package/package.json +7 -6
package/src/speechflow-node-t2a-kokoro.ts +160 -0
package/src/{speechflow-node-t2t-gemma.ts → speechflow-node-t2t-ollama.ts} +44 -10
package/src/speechflow-node-t2t-openai.ts +246 -0
package/src/speechflow-node-t2t-transformers.ts +244 -0
package/src/speechflow-node-x2x-trace.ts +2 -2
package/src/speechflow.ts +86 -40
package/src/speechflow-node-t2t-opus.ts +0 -111

package/README.md CHANGED Viewed

@@ -14,9 +14,13 @@ SpeechFlow
 About
 -----
-**SpeechFlow** is a command-line interface based tool for establishing a
-directed data flow graph of audio and text processing nodes. This way,
-it allows to perform various speech processing tasks in a flexible way.
+**SpeechFlow** is a command-line interface based tool for establishing
+a directed data flow graph of audio and text processing nodes. This
+way, it allows to perform various speech processing tasks in a very
+flexible and configurable way. The usual supported tasks are capturing
+audio, generate narrations of text (aka text-to-speech), generate
+transcriptions or subtitles for audio (aka speech-to-text), and generate
+translations for audio (aka speech-to-speech).
 **SpeechFlow** comes with built-in graph nodes for
 local file I/O,
@@ -26,8 +30,8 @@ remote MQTT network I/O,
 cloud-based [Deepgram](https://deepgram.com) speech-to-text conversion,
 cloud-based [ElevenLabs](https://elevenlabs.io/) text-to-speech conversion,
 cloud-based [DeepL](https://deepl.com) text-to-text translation,
-local [Gemma/Ollama](https://ollama.com/library/gemma3) text-to-text translation,
-local [Gemma/Ollama](https://ollama.com/library/gemma3) text-to-text spelling correction,
+cloud-based [OpenAI/GPT](https://openai.com) text-to-text translation (or spelling correction),
+local [Ollama/Gemma](https://ollama.com) text-to-text translation (or spelling correction),
 local [OPUS/ONNX](https://github.com/Helsinki-NLP/Opus-MT) text-to-text translation,
 local [FFmpeg](https://ffmpeg.org/) speech-to-speech encoding,
 local WAV speech-to-speech encoding,
@@ -67,8 +71,7 @@ Processing Graph Examples
 -------------------------
 The following are examples of **SpeechFlow** processing graphs.
-They can also be found in the [sample.yaml](./sample.yaml) file
-for easy consumption with `speechflow -c <id>@sample.yaml>`.
+They can also be found in the sample [speechflow.yaml](./etc/speechflow.yaml) file.
 - **Capturing**: Capture audio from microphone device into WAV audio file:
@@ -89,7 +92,7 @@ for easy consumption with `speechflow -c <id>@sample.yaml>`.
   }
   ```
-- **Narration**: Generate text file with German narration of MP3 audio file:
+- **Transcription**: Generate text file with German transcription of MP3 audio file:
   ```
   file(path: argv.0, mode: "r", type: "audio") |
@@ -109,6 +112,15 @@ for easy consumption with `speechflow -c <id>@sample.yaml>`.
                   file(path: argv.1, mode: "w", type: "text")
   ```
+- **Speaking**: Generate audio file with English voice for a text file:
+  ```
+  file(path: argv.0, mode: "r", type: "text") |
+      kokoro(language: "en") |
+          wav(mode: "encode") |
+              file(path: argv.1, mode: "w", type: "audio")
+  ```
 - **Ad-Hoc Translation**: Ad-Hoc text translation from German to English
   via stdin/stdout:
@@ -167,8 +179,9 @@ First a short overview of the available processing nodes:
   **deepgram**.
 - Text-to-Text nodes:
   **deepl**,
-  **gemma**,
-  **opus**,
+  **openai**,
+  **ollama**,
+  **transformers**,
   **subtitle**,
   **format**.
 - Text-to-Audio nodes:
@@ -306,10 +319,10 @@ First a short overview of the available processing nodes:
   | **src**      | 0         | "de"     | `/^(?:de\|en)$/` |
   | **dst**      | 1         | "en"     | `/^(?:de\|en)$/` |
-- Node: **gemma**<br/>
-  Purpose: **Google Gemma Text-to-Text translation and spelling correction**<br/>
-  Example: `gemma(src: "de", dst: "en")`<br/>
-  Notice; this node requires the Ollama API!
+- Node: **openai**<br/>
+  Purpose: **OpenAI/GPT Text-to-Text translation and spelling correction**<br/>
+  Example: `openai(src: "de", dst: "en")`<br/>
+  Notice: this node requires an OpenAI API key!
   | Port    | Payload     |
   | ------- | ----------- |
@@ -318,13 +331,32 @@ First a short overview of the available processing nodes:
   | Parameter    | Position  | Default  | Requirement        |
   | ------------ | --------- | -------- | ------------------ |
-  | **url**      | *none*    | "http://127.0.0.1:11434" | `/^https?:\/\/.+?:\d+$/` |
+  | **api**      | *none*    | "https://api.openai.com" | `/^https?:\/\/.+?:\d+$/` |
   | **src**      | 0         | "de"     | `/^(?:de\|en)$/` |
   | **dst**      | 1         | "en"     | `/^(?:de\|en)$/` |
+  | **key**      | *none*    | env.SPEECHFLOW\_KEY\_OPENAI | *none* |
+  | **model**    | *none*    | "gpt-4o-mini" | *none* |
-- Node: **opus**<br/>
-  Purpose: **OPUS Text-to-Text translation**<br/>
-  Example: `deepl(src: "de", dst: "en")`<br/>
+- Node: **ollama**<br/>
+  Purpose: **Ollama/Gemma Text-to-Text translation and spelling correction**<br/>
+  Example: `ollama(src: "de", dst: "en")`<br/>
+  Notice: this node requires the Ollama API!
+  | Port    | Payload     |
+  | ------- | ----------- |
+  | input   | text        |
+  | output  | text        |
+  | Parameter    | Position  | Default  | Requirement        |
+  | ------------ | --------- | -------- | ------------------ |
+  | **api**      | *none*    | "http://127.0.0.1:11434" | `/^https?:\/\/.+?:\d+$/` |
+  | **model**    | *none*    | "gemma3:4b-it-q4_K_M" | *none* |
+  | **src**      | 0         | "de"     | `/^(?:de\|en)$/` |
+  | **dst**      | 1         | "en"     | `/^(?:de\|en)$/` |
+- Node: **transformers**<br/>
+  Purpose: **Transformers Text-to-Text translation**<br/>
+  Example: `transformers(src: "de", dst: "en")`<br/>
   | Port    | Payload     |
   | ------- | ----------- |
@@ -333,6 +365,7 @@ First a short overview of the available processing nodes:
   | Parameter    | Position  | Default  | Requirement      |
   | ------------ | --------- | -------- | ---------------- |
+  | **model**    | *none*    | "OPUS"   | `/^(?:OPUS|SmolLM3)$/` |
   | **src**      | 0         | "de"     | `/^(?:de\|en)$/` |
   | **dst**      | 1         | "en"     | `/^(?:de\|en)$/` |
@@ -380,6 +413,22 @@ First a short overview of the available processing nodes:
   | **voice**    | 0         | "Brian"  | *none* |
   | **language** | 1         | "de"     | *none* |
+- Node: **kokoro**<br/>
+  Purpose: **Kokoro Text-to-Speech conversion**<br/>
+  Example: `kokoro(language: "en")`<br/>
+  Notice: this currently support English language only!
+  | Port    | Payload     |
+  | ------- | ----------- |
+  | input   | text        |
+  | output  | audio       |
+  | Parameter    | Position  | Default  | Requirement |
+  | ------------ | --------- | -------- | ----------- |
+  | **voice**    | 0         | "Aoede"  | `/^(?:Aoede|Heart|Puck|Fenrir)$/` |
+  | **language** | 1         | "en"     | `/^en$/`    |
+  | **speed**    | 2         | 1.25     | 1.0...1.30  |
 ### Any-to-Any Nodes:
 - Node: **trace**<br/>

package/dst/speechflow-node-a2a-vad.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import SpeechFlowNode from "./speechflow-node";
+export default class SpeechFlowNodeVAD extends SpeechFlowNode {
+    static name: string;
+    private vad;
+    private queue;
+    private queueRecv;
+    private queueVAD;
+    private queueSend;
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
+        [id: string]: any;
+    }, args: any[]);
+    open(): Promise<void>;
+    close(): Promise<void>;
+}

package/dst/speechflow-node-a2a-vad.js ADDED Viewed

@@ -0,0 +1,431 @@
+"use strict";
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+/*  standard dependencies  */
+const node_events_1 = require("node:events");
+const node_stream_1 = __importDefault(require("node:stream"));
+/*  external dependencies  */
+const wavefile = __importStar(require("wavefile"));
+const vad_node_realtime_1 = require("@ericedouard/vad-node-realtime");
+/*  internal dependencies  */
+const speechflow_node_1 = __importDefault(require("./speechflow-node"));
+/*  audio stream queue pointer  */
+class AudioQueuePointer extends node_events_1.EventEmitter {
+    name;
+    queue;
+    /*  internal state  */
+    index = 0;
+    /*  construction  */
+    constructor(name, queue) {
+        super();
+        this.name = name;
+        this.queue = queue;
+    }
+    /*  positioning operations  */
+    maxPosition() {
+        return this.queue.elements.length;
+    }
+    position(index) {
+        if (index !== undefined) {
+            this.index = index;
+            if (this.index < 0)
+                this.index = 0;
+            else if (this.index >= this.queue.elements.length)
+                this.index = this.queue.elements.length;
+            this.emit("position", this.index);
+        }
+        return this.index;
+    }
+    walk(num) {
+        if (num > 0) {
+            for (let i = 0; i < num && this.index < this.queue.elements.length; i++)
+                this.index++;
+            this.emit("position", { start: this.index });
+        }
+        else if (num < 0) {
+            for (let i = 0; i < Math.abs(num) && this.index > 0; i++)
+                this.index--;
+            this.emit("position", { start: this.index });
+        }
+    }
+    walkForwardUntil(type) {
+        while (this.index < this.queue.elements.length
+            && this.queue.elements[this.index].type !== type)
+            this.index++;
+        this.emit("position", { start: this.index });
+    }
+    walkBackwardUntil(type) {
+        while (this.index > 0
+            && this.queue.elements[this.index].type !== type)
+            this.index--;
+        this.emit("position", { start: this.index });
+    }
+    /*  search operations  */
+    searchForward(type) {
+        let position = this.index;
+        while (position < this.queue.elements.length
+            && this.queue.elements[position].type !== type)
+            position++;
+        this.emit("search", { start: this.index, end: position });
+        return position;
+    }
+    searchBackward(type) {
+        let position = this.index;
+        while (position > 0
+            && this.queue.elements[position].type !== type)
+            position--;
+        this.emit("search", { start: position, end: this.index });
+    }
+    /*  reading operations  */
+    peek(position) {
+        if (position === undefined)
+            position = this.index;
+        else {
+            if (position < 0)
+                position = 0;
+            else if (position >= this.queue.elements.length)
+                position = this.queue.elements.length;
+        }
+        const element = this.queue.elements[position];
+        this.queue.emit("read", { start: position, end: position });
+        return element;
+    }
+    read() {
+        const element = this.queue.elements[this.index];
+        if (this.index < this.queue.elements.length)
+            this.index++;
+        this.queue.emit("read", { start: this.index - 1, end: this.index - 1 });
+        return element;
+    }
+    slice(size) {
+        let slice;
+        const start = this.index;
+        if (size !== undefined) {
+            slice = this.queue.elements.slice(this.index, size);
+            this.index += size;
+        }
+        else {
+            slice = this.queue.elements.slice(this.index);
+            this.index = this.queue.elements.length;
+        }
+        this.queue.emit("read", { start, end: this.index });
+        return slice;
+    }
+    /*  writing operations  */
+    append(element) {
+        this.queue.elements.push(element);
+        this.index = this.queue.elements.length;
+        this.queue.emit("write", { start: this.index - 1, end: this.index - 1 });
+    }
+    insert(element) {
+        this.queue.elements.splice(this.index++, 0, element);
+        this.queue.emit("write", { start: this.index - 1, end: this.index });
+    }
+    delete() {
+        if (this.index >= this.queue.elements.length)
+            throw new Error("cannot delete after last element");
+        this.queue.elements.splice(this.index, 1);
+        this.queue.emit("write", { start: this.index, end: this.index });
+    }
+}
+/*  audio stream queue  */
+class AudioQueue extends node_events_1.EventEmitter {
+    elements = [];
+    pointers = new Map();
+    pointerUse(name) {
+        if (!this.pointers.has(name))
+            this.pointers.set(name, new AudioQueuePointer(name, this));
+        return this.pointers.get(name);
+    }
+    pointerDelete(name) {
+        if (!this.pointers.has(name))
+            throw new Error("pointer not exists");
+        this.pointers.delete(name);
+    }
+    trim() {
+        /*  determine minimum pointer position  */
+        let min = this.elements.length;
+        for (const pointer of this.pointers.values())
+            if (min > pointer.position())
+                min = pointer.position();
+        /*  trim the maximum amount of first elements  */
+        this.elements.splice(0, min);
+        /*  shift all pointers  */
+        for (const pointer of this.pointers.values())
+            pointer.position(pointer.position() - min);
+    }
+}
+/*  SpeechFlow node for VAD speech-to-speech processing  */
+class SpeechFlowNodeVAD extends speechflow_node_1.default {
+    /*  declare official node name  */
+    static name = "vad";
+    /*  internal state  */
+    vad = null;
+    queue = new AudioQueue();
+    queueRecv = this.queue.pointerUse("recv");
+    queueVAD = this.queue.pointerUse("vad");
+    queueSend = this.queue.pointerUse("send");
+    /*  construct node  */
+    constructor(id, cfg, opts, args) {
+        super(id, cfg, opts, args);
+        /*  declare node configuration parameters  */
+        this.configure({});
+        /*  declare node input/output format  */
+        this.input = "audio";
+        this.output = "audio";
+    }
+    /*  open node  */
+    async open() {
+        /*  sanity check situation  */
+        if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
+            throw new Error("VAD node currently supports PCM-S16LE audio only");
+        /*  pass-through logging  */
+        const log = (level, msg) => { this.log(level, msg); };
+        /*  internal processing constants  */
+        const sampleRateTarget = 16000;
+        const samplesPerVADFrame = 512; /* required for VAD v5 */
+        const minFramesPerSecond = Math.trunc(sampleRateTarget / samplesPerVADFrame) + 1;
+        /*  track audio queue element changes  */
+        let speechActive = false;
+        let speechStart = -1;
+        let speechEnd = -1;
+        let speechMinSeconds = 2;
+        this.queue.on("write", () => {
+            if (!speechActive) {
+                const position = this.queueSend.searchForward("speech-start");
+                const element = this.queueSend.peek(position);
+                if (element !== undefined && element.type === "speech-start") {
+                    this.queueSend.position(position + 1);
+                    speechActive = true;
+                    speechStart = this.queueSend.position();
+                    speechEnd = speechStart;
+                    speechMinSeconds = 2;
+                }
+            }
+            else {
+                speechEnd = this.queueSend.searchForward("speech-end");
+                /*   determine number of speech and fill frames  */
+                let framesSpeech = 0;
+                for (let f = speechStart; f < speechEnd; f++) {
+                    const element = this.queueSend.peek(f);
+                    if (element.type === "audio-frame")
+                        framesSpeech++;
+                }
+                let framesFilled = minFramesPerSecond - framesSpeech;
+                if (framesFilled < 0)
+                    framesFilled = 0;
+                /*  assemble all speech and fill frames  */
+                /*
+                const assembleFrames = () => {
+                    const speech = new Float32Array((framesSpeech + framesFilled) * samplesPerVADFrame)
+                    let i = 0
+                    for (let f = speechStart; f < speechEnd; f++) {
+                        const element = this.queueSend.peek(f)
+                        if (element.type === "audio-frame")
+                            speech.set(element.data, samplesPerVADFrame * i++)
+                    }
+                    if (framesFilled > 0)
+                        speech.fill(0.0, i * samplesPerVADFrame, (i + framesFilled) * samplesPerVADFrame)
+                    return speech
+                }
+                */
+                if (speechEnd === this.queueSend.maxPosition()) {
+                    /*  intermediate transcription  */
+                    const duration = ((framesSpeech + framesFilled) * samplesPerVADFrame) / sampleRateTarget;
+                    if (duration >= speechMinSeconds) {
+                        /*  intermediate transcription of at least the next required minimum seconds  */
+                        // const samples = assembleFrames()
+                        this.log("info", `trigger intermediate transcription (duration: ${duration.toFixed(1)}s)`);
+                        // this.tqueue!.enqueue({ id: speechStart, type: "intermediate", audio: samples, language: this.params.language })
+                        speechMinSeconds++;
+                    }
+                }
+                else {
+                    /*  final transcription  */
+                    const duration = ((framesSpeech + framesFilled) * samplesPerVADFrame) / sampleRateTarget;
+                    if (duration >= 1.0) {
+                        // const samples = assembleFrames()
+                        this.log("info", `trigger final transcription (duration: ${duration.toFixed(1)}s)`);
+                        // this.tqueue!.enqueue({ id: speechStart, type: "final", audio: samples, language: this.params.language })
+                        this.queueSend.position(speechEnd + 1);
+                    }
+                    else
+                        this.log("info", `skipping final transcription -- too short (duration: ${duration.toFixed(1)}s)`);
+                    speechActive = false;
+                }
+            }
+        });
+        /*  Voice Activity Detection (VAD)  */
+        this.vad = await vad_node_realtime_1.RealTimeVAD.new({
+            onSpeechStart: () => {
+                this.log("info", "VAD: speech start");
+                this.queueVAD.insert({ type: "speech-start" });
+            },
+            onSpeechEnd: (audio) => {
+                this.log("info", `VAD: speech end (samples: ${audio.length})`);
+                this.queueVAD.insert({ type: "speech-end", short: false });
+            },
+            onVADMisfire: () => {
+                this.log("info", "VAD: speech end (segment too short)");
+                this.queueVAD.insert({ type: "speech-end", short: true });
+            },
+            onFrameProcessed: () => {
+                this.queueVAD.walk(+1);
+            },
+            sampleRate: 16000,
+            model: "v5",
+            frameSamples: samplesPerVADFrame, /* (= 32ms: 512 frameSamples / 16000 sampleSize) */
+            positiveSpeechThreshold: 0.50,
+            negativeSpeechThreshold: 0.35,
+            minSpeechFrames: 4, /* (= 128ms: 4 x 512 frameSamples) */
+            redemptionFrames: 8, /* (= 256ms: 8 x 512 frameSamples) */
+            preSpeechPadFrames: 1, /* (= 32ms:  1 x 512 frameSamples) */
+        });
+        this.vad.start();
+        /*  provide Duplex stream and internally attach to VAD  */
+        const vad = this.vad;
+        const cfg = this.config;
+        const queueRecv = this.queueRecv;
+        const queueSend = this.queueSend;
+        let carrySamples = new Float32Array();
+        let endOfStream = false;
+        this.stream = new node_stream_1.default.Duplex({
+            writableObjectMode: true,
+            readableObjectMode: true,
+            decodeStrings: false,
+            /*  receive audio samples  */
+            write(chunk, encoding, callback) {
+                if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("expected audio input as Buffer chunks"));
+                else if (chunk.payload.byteLength === 0)
+                    callback();
+                else {
+                    /*  convert audio samples from PCM/I16/48KHz to PCM/F32/16KHz  */
+                    const bufferToInt16Array = (buf) => {
+                        const dataView = new DataView(buf.buffer);
+                        const result = new Int16Array(buf.length / 2);
+                        for (let i = 0; i < result.length; i++)
+                            result[i] = dataView.getInt16(i * 2, cfg.audioLittleEndian);
+                        return result;
+                    };
+                    const wav = new wavefile.WaveFile();
+                    wav.fromScratch(cfg.audioChannels, cfg.audioSampleRate, String(cfg.audioBitDepth), bufferToInt16Array(chunk.payload));
+                    wav.toBitDepth("32f");
+                    wav.toSampleRate(16000, { method: "cubic" });
+                    let data = wav.getSamples(false, Float32Array);
+                    /*  merge previous carry samples  */
+                    if (carrySamples.length > 0) {
+                        const merged = new Float32Array(carrySamples.length + data.length);
+                        merged.set(carrySamples);
+                        merged.set(data, carrySamples.length);
+                        data = merged;
+                        carrySamples = new Float32Array();
+                    }
+                    /*  queue audio samples as individual VAD-sized frames
+                        and in parallel send it into the Voice Activity Detection (VAD)  */
+                    const chunks = Math.trunc(data.length / samplesPerVADFrame);
+                    for (let i = 0; i < chunks; i++) {
+                        const frame = data.slice(i * samplesPerVADFrame, (i + 1) * samplesPerVADFrame);
+                        queueRecv.append({ type: "audio-frame", data: frame });
+                        vad.processAudio(frame);
+                    }
+                    /*  remember new carry samples  */
+                    const bulkLen = chunks * samplesPerVADFrame;
+                    carrySamples = data.slice(bulkLen);
+                    callback();
+                }
+            },
+            /*  send transcription texts  */
+            read(size) {
+                if (endOfStream)
+                    this.push(null);
+                else {
+                    queueSend.once("write", (text) => {
+                        log("info", `VAD: receive data (${text.length} bytes)`);
+                        this.push(text, cfg.textEncoding);
+                    });
+                }
+            },
+            /*  react on end of input  */
+            final(callback) {
+                if (carrySamples.length > 0) {
+                    /*  flush pending audio samples  */
+                    if (carrySamples.length < samplesPerVADFrame) {
+                        const merged = new Float32Array(samplesPerVADFrame);
+                        merged.set(carrySamples);
+                        merged.fill(0.0, carrySamples.length, samplesPerVADFrame);
+                        carrySamples = merged;
+                    }
+                    queueRecv.append({ type: "audio-frame", data: carrySamples });
+                    vad.processAudio(carrySamples);
+                    /*  give the processing a chance to still process the remaining samples  */
+                    setTimeout(() => {
+                        endOfStream = true;
+                        this.push(null);
+                        callback();
+                    }, 2000);
+                }
+                else {
+                    endOfStream = true;
+                    this.push(null);
+                    callback();
+                }
+            }
+        });
+    }
+    /*  close node  */
+    async close() {
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy();
+            this.stream = null;
+        }
+        /*  close VAD  */
+        if (this.vad !== null) {
+            await this.vad.flush();
+            this.vad.destroy();
+            this.vad = null;
+        }
+    }
+}
+exports.default = SpeechFlowNodeVAD;

package/dst/speechflow-node-t2a-kokoro.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import SpeechFlowNode from "./speechflow-node";
+export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
+    static name: string;
+    private kokoro;
+    private static speexInitialized;
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
+        [id: string]: any;
+    }, args: any[]);
+    open(): Promise<void>;
+    close(): Promise<void>;
+}