npm - speechflow - Versions diffs - 0.9.4 → 0.9.7 - Mend

speechflow 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

package/CHANGELOG.md +19 -0
package/README.md +227 -54
package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
package/dst/speechflow-node-a2a-wav.d.ts +11 -0
package/dst/speechflow-node-a2a-wav.js +170 -0
package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
package/dst/speechflow-node-a2t-deepgram.js +220 -0
package/dst/speechflow-node-deepgram.d.ts +3 -1
package/dst/speechflow-node-deepgram.js +86 -22
package/dst/speechflow-node-deepl.d.ts +3 -1
package/dst/speechflow-node-deepl.js +25 -20
package/dst/speechflow-node-device.d.ts +3 -1
package/dst/speechflow-node-device.js +53 -2
package/dst/speechflow-node-elevenlabs.d.ts +4 -1
package/dst/speechflow-node-elevenlabs.js +88 -49
package/dst/speechflow-node-ffmpeg.d.ts +3 -1
package/dst/speechflow-node-ffmpeg.js +42 -4
package/dst/speechflow-node-file.d.ts +3 -1
package/dst/speechflow-node-file.js +84 -13
package/dst/speechflow-node-format.d.ts +11 -0
package/dst/speechflow-node-format.js +80 -0
package/dst/speechflow-node-gemma.d.ts +3 -1
package/dst/speechflow-node-gemma.js +84 -23
package/dst/speechflow-node-mqtt.d.ts +13 -0
package/dst/speechflow-node-mqtt.js +181 -0
package/dst/speechflow-node-opus.d.ts +12 -0
package/dst/speechflow-node-opus.js +135 -0
package/dst/speechflow-node-subtitle.d.ts +12 -0
package/dst/speechflow-node-subtitle.js +96 -0
package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
package/dst/speechflow-node-t2t-deepl.js +133 -0
package/dst/speechflow-node-t2t-format.d.ts +11 -0
package/dst/speechflow-node-t2t-format.js +80 -0
package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
package/dst/speechflow-node-t2t-gemma.js +213 -0
package/dst/speechflow-node-t2t-opus.d.ts +12 -0
package/dst/speechflow-node-t2t-opus.js +135 -0
package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
package/dst/speechflow-node-t2t-subtitle.js +96 -0
package/dst/speechflow-node-trace.d.ts +11 -0
package/dst/speechflow-node-trace.js +88 -0
package/dst/speechflow-node-wav.d.ts +11 -0
package/dst/speechflow-node-wav.js +170 -0
package/dst/speechflow-node-websocket.d.ts +3 -1
package/dst/speechflow-node-websocket.js +149 -49
package/dst/speechflow-node-whisper-common.d.ts +34 -0
package/dst/speechflow-node-whisper-common.js +7 -0
package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
package/dst/speechflow-node-whisper-ggml.js +97 -0
package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
package/dst/speechflow-node-whisper-onnx.js +131 -0
package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
package/dst/speechflow-node-whisper-worker.d.ts +1 -0
package/dst/speechflow-node-whisper-worker.js +116 -0
package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
package/dst/speechflow-node-whisper-worker2.js +82 -0
package/dst/speechflow-node-whisper.d.ts +19 -0
package/dst/speechflow-node-whisper.js +604 -0
package/dst/speechflow-node-x2x-trace.d.ts +11 -0
package/dst/speechflow-node-x2x-trace.js +88 -0
package/dst/speechflow-node-xio-device.d.ts +13 -0
package/dst/speechflow-node-xio-device.js +205 -0
package/dst/speechflow-node-xio-file.d.ts +11 -0
package/dst/speechflow-node-xio-file.js +176 -0
package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
package/dst/speechflow-node-xio-mqtt.js +181 -0
package/dst/speechflow-node-xio-websocket.d.ts +13 -0
package/dst/speechflow-node-xio-websocket.js +275 -0
package/dst/speechflow-node.d.ts +25 -7
package/dst/speechflow-node.js +74 -9
package/dst/speechflow-utils.d.ts +23 -0
package/dst/speechflow-utils.js +194 -0
package/dst/speechflow.js +146 -43
package/etc/biome.jsonc +12 -4
package/etc/stx.conf +65 -0
package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
package/package.json +49 -31
package/sample.yaml +61 -23
package/src/lib.d.ts +6 -1
package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
package/src/speechflow-node-a2a-wav.ts +143 -0
package/src/speechflow-node-a2t-deepgram.ts +199 -0
package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
package/src/speechflow-node-t2t-format.ts +85 -0
package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
package/src/speechflow-node-t2t-opus.ts +111 -0
package/src/speechflow-node-t2t-subtitle.ts +101 -0
package/src/speechflow-node-x2x-trace.ts +92 -0
package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
package/src/speechflow-node-xio-file.ts +153 -0
package/src/speechflow-node-xio-mqtt.ts +154 -0
package/src/speechflow-node-xio-websocket.ts +248 -0
package/src/speechflow-node.ts +78 -13
package/src/speechflow-utils.ts +212 -0
package/src/speechflow.ts +150 -43
package/etc/nps.yaml +0 -40
package/src/speechflow-node-deepgram.ts +0 -133
package/src/speechflow-node-elevenlabs.ts +0 -116
package/src/speechflow-node-file.ts +0 -108
package/src/speechflow-node-websocket.ts +0 -179

package/dst/speechflow-node-a2a-wav.js ADDED Viewed

@@ -0,0 +1,170 @@
+"use strict";
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+/*  standard dependencies  */
+const node_stream_1 = __importDefault(require("node:stream"));
+/*  external dependencies  */
+const wav_1 = __importDefault(require("wav"));
+/*  internal dependencies  */
+const speechflow_node_1 = __importDefault(require("./speechflow-node"));
+const utils = __importStar(require("./speechflow-utils"));
+/*  utility class for wrapping a custom stream into a regular Transform stream  */
+class StreamWrapper extends node_stream_1.default.Transform {
+    foreignStream;
+    constructor(foreignStream, options = {}) {
+        options.readableObjectMode = true;
+        options.writableObjectMode = true;
+        super(options);
+        this.foreignStream = foreignStream;
+        this.foreignStream.on("data", (chunk) => {
+            this.push(chunk);
+        });
+        this.foreignStream.on("error", (err) => {
+            this.emit("error", err);
+        });
+        this.foreignStream.on("end", () => {
+            this.push(null);
+        });
+    }
+    _transform(chunk, encoding, callback) {
+        try {
+            const canContinue = this.foreignStream.write(chunk);
+            if (canContinue)
+                callback();
+            else
+                this.foreignStream.once("drain", callback);
+        }
+        catch (err) {
+            callback(err);
+        }
+    }
+    _flush(callback) {
+        try {
+            if (typeof this.foreignStream.end === "function")
+                this.foreignStream.end();
+            callback();
+        }
+        catch (err) {
+            callback(err);
+        }
+    }
+}
+/*  SpeechFlow node for WAV format conversion  */
+class SpeechFlowNodeWAV extends speechflow_node_1.default {
+    /*  declare official node name  */
+    static name = "wav";
+    /*  construct node  */
+    constructor(id, cfg, opts, args) {
+        super(id, cfg, opts, args);
+        /*  declare node configuration parameters  */
+        this.configure({
+            mode: { type: "string", pos: 1, val: "encode", match: /^(?:encode|decode)$/ }
+        });
+        /*  declare node input/output format  */
+        this.input = "audio";
+        this.output = "audio";
+    }
+    /*  open node  */
+    async open() {
+        if (this.params.mode === "encode") {
+            /*  convert raw/PCM to WAV/PCM  */
+            /*  NOTICE: as this is a continuous stream, the resulting WAV header is not 100%
+                conforming to the WAV standard, as it has to use a zero duration information.
+                This cannot be changed in a stream-based processing.  */
+            const writer = new wav_1.default.Writer({
+                format: 0x0001 /* PCM */,
+                channels: this.config.audioChannels,
+                sampleRate: this.config.audioSampleRate,
+                bitDepth: this.config.audioBitDepth
+            });
+            this.stream = new StreamWrapper(writer);
+        }
+        else if (this.params.mode === "decode") {
+            /*  convert WAV/PCM to raw/PCM  */
+            const reader = new wav_1.default.Reader();
+            reader.on("format", (format) => {
+                this.log("info", `WAV audio stream: format=${format.audioFormat === 0x0001 ? "PCM" :
+                    "0x" + format.audioFormat.toString(16).padStart(4, "0")} ` +
+                    `bitDepth=${format.bitDepth} ` +
+                    `signed=${format.signed ? "yes" : "no"} ` +
+                    `endian=${format.endianness} ` +
+                    `sampleRate=${format.sampleRate} ` +
+                    `channels=${format.channels}`);
+                if (format.audioFormat !== 0x0001 /* PCM */)
+                    throw new Error("WAV not based on PCM format");
+                if (format.bitDepth !== 16)
+                    throw new Error("WAV not based on 16 bit samples");
+                if (!format.signed)
+                    throw new Error("WAV not based on signed integers");
+                if (format.endianness !== "LE")
+                    throw new Error("WAV not based on little endianness");
+                if (format.sampleRate !== 48000)
+                    throw new Error("WAV not based on 48Khz sample rate");
+                if (format.channels !== 1)
+                    throw new Error("WAV not based on mono channel");
+            });
+            this.stream = new StreamWrapper(reader);
+        }
+        else
+            throw new Error(`invalid operation mode "${this.params.mode}"`);
+        /*  convert regular stream into object-mode stream  */
+        const wrapper1 = utils.createTransformStreamForWritableSide();
+        const wrapper2 = utils.createTransformStreamForReadableSide("audio", () => this.timeZero);
+        this.stream = node_stream_1.default.compose(wrapper1, this.stream, wrapper2);
+    }
+    /*  close node  */
+    async close() {
+        /*  shutdown stream  */
+        if (this.stream !== null) {
+            await new Promise((resolve) => {
+                if (this.stream instanceof node_stream_1.default.Duplex)
+                    this.stream.end(() => { resolve(); });
+                else
+                    resolve();
+            });
+            this.stream.destroy();
+            this.stream = null;
+        }
+    }
+}
+exports.default = SpeechFlowNodeWAV;

package/dst/speechflow-node-a2t-deepgram.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import SpeechFlowNode from "./speechflow-node";
+export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
+    static name: string;
+    private dg;
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
+        [id: string]: any;
+    }, args: any[]);
+    open(): Promise<void>;
+    close(): Promise<void>;
+}

package/dst/speechflow-node-a2t-deepgram.js ADDED Viewed

@@ -0,0 +1,220 @@
+"use strict";
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+/*  standard dependencies  */
+const node_stream_1 = __importDefault(require("node:stream"));
+/*  external dependencies  */
+const Deepgram = __importStar(require("@deepgram/sdk"));
+const luxon_1 = require("luxon");
+/*  internal dependencies  */
+const speechflow_node_1 = __importStar(require("./speechflow-node"));
+const utils = __importStar(require("./speechflow-utils"));
+/*  SpeechFlow node for Deepgram speech-to-text conversion  */
+class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
+    /*  declare official node name  */
+    static name = "deepgram";
+    /*  internal state  */
+    dg = null;
+    /*  construct node  */
+    constructor(id, cfg, opts, args) {
+        super(id, cfg, opts, args);
+        /*  declare node configuration parameters  */
+        this.configure({
+            key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
+            model: { type: "string", val: "nova-3", pos: 0 },
+            version: { type: "string", val: "latest", pos: 1 },
+            language: { type: "string", val: "multi", pos: 2 }
+        });
+        /*  declare node input/output format  */
+        this.input = "audio";
+        this.output = "text";
+    }
+    /*  open node  */
+    async open() {
+        /*  sanity check situation  */
+        if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
+            throw new Error("Deepgram node currently supports PCM-S16LE audio only");
+        /*  create queue for results  */
+        const queue = new utils.SingleQueue();
+        /*  connect to Deepgram API  */
+        const deepgram = Deepgram.createClient(this.params.key);
+        let language = "en";
+        if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
+            language = this.params.language;
+        else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
+            language = "multi";
+        this.dg = deepgram.listen.live({
+            mip_opt_out: true,
+            model: this.params.model,
+            version: this.params.version,
+            language,
+            channels: this.config.audioChannels,
+            sample_rate: this.config.audioSampleRate,
+            encoding: "linear16",
+            multichannel: false,
+            endpointing: 10,
+            interim_results: false,
+            smart_format: true,
+            punctuate: true,
+            filler_words: true,
+            diarize: true, /* still not used by us */
+            numerals: true,
+            profanity_filter: false
+        });
+        /*  hook onto Deepgram API events  */
+        this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
+            const text = data.channel?.alternatives[0].transcript ?? "";
+            if (text === "")
+                this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`);
+            else {
+                this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`);
+                const start = luxon_1.Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset);
+                const end = start.plus({ seconds: data.duration });
+                const chunk = new speechflow_node_1.SpeechFlowChunk(start, end, "final", "text", text);
+                queue.write(chunk);
+            }
+        });
+        this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
+            this.log("info", "Deepgram: metadata received");
+        });
+        this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
+            this.log("info", "Deepgram: connection close");
+        });
+        this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error) => {
+            this.log("error", `Deepgram: ${error.message}`);
+            this.emit("error");
+        });
+        /*  wait for Deepgram API to be available  */
+        await new Promise((resolve, reject) => {
+            let timer = setTimeout(() => {
+                if (timer !== null) {
+                    timer = null;
+                    reject(new Error("Deepgram: timeout waiting for connection open"));
+                }
+            }, 3000);
+            this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
+                this.log("info", "Deepgram: connection open");
+                if (timer !== null) {
+                    clearTimeout(timer);
+                    timer = null;
+                }
+                resolve(true);
+            });
+        });
+        /*  remember opening time to receive time zero offset  */
+        this.timeOpen = luxon_1.DateTime.now();
+        /*  workaround Deepgram initialization problems  */
+        let initDone = false;
+        let initTimeout = null;
+        const initTimeoutStart = () => {
+            if (initDone)
+                return;
+            setTimeout(async () => {
+                if (initTimeout === null)
+                    return;
+                initTimeout = null;
+                this.log("warning", "Deepgram: initialization timeout -- restarting service usage");
+                await this.close();
+                this.open();
+            }, 3000);
+        };
+        const initTimeoutStop = () => {
+            if (initDone)
+                return;
+            initDone = true;
+            if (initTimeout !== null) {
+                clearTimeout(initTimeout);
+                initTimeout = null;
+            }
+        };
+        /*  provide Duplex stream and internally attach to Deepgram API  */
+        const dg = this.dg;
+        const log = (level, msg) => {
+            this.log(level, msg);
+        };
+        const encoding = this.config.textEncoding;
+        this.stream = new node_stream_1.default.Duplex({
+            writableObjectMode: true,
+            readableObjectMode: true,
+            decodeStrings: false,
+            write(chunk, encoding, callback) {
+                if (chunk.type !== "audio")
+                    callback(new Error("expected audio input chunk"));
+                else if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("expected Buffer input chunk"));
+                else {
+                    if (chunk.payload.byteLength > 0) {
+                        log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`);
+                        initTimeoutStart();
+                        dg.send(chunk.payload); /* intentionally discard all time information  */
+                    }
+                    callback();
+                }
+            },
+            read(size) {
+                queue.read().then((chunk) => {
+                    log("info", `Deepgram: receive data (${chunk.payload.length} bytes)`);
+                    initTimeoutStop();
+                    this.push(chunk, encoding);
+                });
+            },
+            final(callback) {
+                dg.requestClose();
+                this.push(null);
+                callback();
+            }
+        });
+    }
+    /*  close node  */
+    async close() {
+        /*  close stream  */
+        if (this.stream !== null) {
+            this.stream.destroy();
+            this.stream = null;
+        }
+        /*  shutdown Deepgram API  */
+        if (this.dg !== null)
+            this.dg.requestClose();
+    }
+}
+exports.default = SpeechFlowNodeDeepgram;

package/dst/speechflow-node-deepgram.d.ts CHANGED Viewed

@@ -2,7 +2,9 @@ import SpeechFlowNode from "./speechflow-node";
 export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
     static name: string;
     private dg;
-    constructor(id: string, opts: {
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
         [id: string]: any;
     }, args: any[]);
     open(): Promise<void>;

package/dst/speechflow-node-deepgram.js CHANGED Viewed

@@ -42,12 +42,13 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 /*  standard dependencies  */
-const node_events_1 = require("node:events");
-/*  external dependencies  */
 const node_stream_1 = __importDefault(require("node:stream"));
+/*  external dependencies  */
 const Deepgram = __importStar(require("@deepgram/sdk"));
+const luxon_1 = require("luxon");
 /*  internal dependencies  */
-const speechflow_node_1 = __importDefault(require("./speechflow-node"));
+const speechflow_node_1 = __importStar(require("./speechflow-node"));
+const utils = __importStar(require("./speechflow-utils"));
 /*  SpeechFlow node for Deepgram speech-to-text conversion  */
 class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
     /*  declare official node name  */
@@ -55,8 +56,8 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
     /*  internal state  */
     dg = null;
     /*  construct node  */
-    constructor(id, opts, args) {
-        super(id, opts, args);
+    constructor(id, cfg, opts, args) {
+        super(id, cfg, opts, args);
         /*  declare node configuration parameters  */
         this.configure({
             key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
@@ -74,13 +75,19 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
         if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
             throw new Error("Deepgram node currently supports PCM-S16LE audio only");
         /*  create queue for results  */
-        const queue = new node_events_1.EventEmitter();
+        const queue = new utils.SingleQueue();
         /*  connect to Deepgram API  */
         const deepgram = Deepgram.createClient(this.params.key);
+        let language = "en";
+        if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
+            language = this.params.language;
+        else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
+            language = "multi";
         this.dg = deepgram.listen.live({
+            mip_opt_out: true,
             model: this.params.model,
             version: this.params.version,
-            language: this.params.language,
+            language,
             channels: this.config.audioChannels,
             sample_rate: this.config.audioSampleRate,
             encoding: "linear16",
@@ -90,18 +97,22 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
             smart_format: true,
             punctuate: true,
             filler_words: true,
-            diarize: true,
+            diarize: true, /* still not used by us */
             numerals: true,
-            paragraphs: true,
-            profanity_filter: true,
-            utterances: false
+            profanity_filter: false
         });
         /*  hook onto Deepgram API events  */
         this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
             const text = data.channel?.alternatives[0].transcript ?? "";
             if (text === "")
-                return;
-            queue.emit("text", text);
+                this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`);
+            else {
+                this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`);
+                const start = luxon_1.Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset);
+                const end = start.plus({ seconds: data.duration });
+                const chunk = new speechflow_node_1.SpeechFlowChunk(start, end, "final", "text", text);
+                queue.write(chunk);
+            }
         });
         this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
             this.log("info", "Deepgram: metadata received");
@@ -114,30 +125,83 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
             this.emit("error");
         });
         /*  wait for Deepgram API to be available  */
-        await new Promise((resolve) => {
+        await new Promise((resolve, reject) => {
+            let timer = setTimeout(() => {
+                if (timer !== null) {
+                    timer = null;
+                    reject(new Error("Deepgram: timeout waiting for connection open"));
+                }
+            }, 3000);
             this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
                 this.log("info", "Deepgram: connection open");
+                if (timer !== null) {
+                    clearTimeout(timer);
+                    timer = null;
+                }
                 resolve(true);
             });
         });
+        /*  remember opening time to receive time zero offset  */
+        this.timeOpen = luxon_1.DateTime.now();
+        /*  workaround Deepgram initialization problems  */
+        let initDone = false;
+        let initTimeout = null;
+        const initTimeoutStart = () => {
+            if (initDone)
+                return;
+            setTimeout(async () => {
+                if (initTimeout === null)
+                    return;
+                initTimeout = null;
+                this.log("warning", "Deepgram: initialization timeout -- restarting service usage");
+                await this.close();
+                this.open();
+            }, 3000);
+        };
+        const initTimeoutStop = () => {
+            if (initDone)
+                return;
+            initDone = true;
+            if (initTimeout !== null) {
+                clearTimeout(initTimeout);
+                initTimeout = null;
+            }
+        };
         /*  provide Duplex stream and internally attach to Deepgram API  */
         const dg = this.dg;
+        const log = (level, msg) => {
+            this.log(level, msg);
+        };
+        const encoding = this.config.textEncoding;
         this.stream = new node_stream_1.default.Duplex({
+            writableObjectMode: true,
+            readableObjectMode: true,
+            decodeStrings: false,
             write(chunk, encoding, callback) {
-                const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength);
-                if (data.byteLength === 0)
-                    queue.emit("text", "");
-                else
-                    dg.send(data);
-                callback();
+                if (chunk.type !== "audio")
+                    callback(new Error("expected audio input chunk"));
+                else if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("expected Buffer input chunk"));
+                else {
+                    if (chunk.payload.byteLength > 0) {
+                        log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`);
+                        initTimeoutStart();
+                        dg.send(chunk.payload); /* intentionally discard all time information  */
+                    }
+                    callback();
+                }
             },
             read(size) {
-                queue.once("text", (text) => {
-                    this.push(text);
+                queue.read().then((chunk) => {
+                    log("info", `Deepgram: receive data (${chunk.payload.length} bytes)`);
+                    initTimeoutStop();
+                    this.push(chunk, encoding);
                 });
             },
             final(callback) {
                 dg.requestClose();
+                this.push(null);
+                callback();
             }
         });
     }

package/dst/speechflow-node-deepl.d.ts CHANGED Viewed

@@ -2,7 +2,9 @@ import SpeechFlowNode from "./speechflow-node";
 export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
     static name: string;
     private deepl;
-    constructor(id: string, opts: {
+    constructor(id: string, cfg: {
+        [id: string]: any;
+    }, opts: {
         [id: string]: any;
     }, args: any[]);
     open(): Promise<void>;

package/dst/speechflow-node-deepl.js CHANGED Viewed

@@ -43,7 +43,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 /*  standard dependencies  */
 const node_stream_1 = __importDefault(require("node:stream"));
-const node_events_1 = require("node:events");
 /*  external dependencies  */
 const DeepL = __importStar(require("deepl-node"));
 /*  internal dependencies  */
@@ -55,8 +54,8 @@ class SpeechFlowNodeDeepL extends speechflow_node_1.default {
     /*  internal state  */
     deepl = null;
     /*  construct node  */
-    constructor(id, opts, args) {
-        super(id, opts, args);
+    constructor(id, cfg, opts, args) {
+        super(id, cfg, opts, args);
         /*  declare node configuration parameters  */
         this.configure({
             key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
@@ -84,27 +83,33 @@ class SpeechFlowNodeDeepL extends speechflow_node_1.default {
             return (result?.text ?? text);
         };
         /*  establish a duplex stream and connect it to DeepL translation  */
-        const queue = new node_events_1.EventEmitter();
-        this.stream = new node_stream_1.default.Duplex({
-            write(chunk, encoding, callback) {
-                const data = chunk.toString();
-                if (data === "") {
-                    queue.emit("result", "");
-                    callback();
-                }
+        this.stream = new node_stream_1.default.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings: false,
+            transform(chunk, encoding, callback) {
+                if (Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"));
                 else {
-                    translate(data).then((result) => {
-                        queue.emit("result", result);
+                    if (chunk.payload === "") {
+                        this.push(chunk);
                         callback();
-                    }).catch((err) => {
-                        callback(err);
-                    });
+                    }
+                    else {
+                        translate(chunk.payload).then((payload) => {
+                            const chunkNew = chunk.clone();
+                            chunkNew.payload = payload;
+                            this.push(chunkNew);
+                            callback();
+                        }).catch((err) => {
+                            callback(err);
+                        });
+                    }
                 }
             },
-            read(size) {
-                queue.once("result", (result) => {
-                    this.push(result);
-                });
+            final(callback) {
+                this.push(null);
+                callback();
             }
         });
     }