speechflow 0.9.4 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +227 -54
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
- package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
- package/dst/speechflow-node-a2a-wav.d.ts +11 -0
- package/dst/speechflow-node-a2a-wav.js +170 -0
- package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
- package/dst/speechflow-node-a2t-deepgram.js +220 -0
- package/dst/speechflow-node-deepgram.d.ts +3 -1
- package/dst/speechflow-node-deepgram.js +86 -22
- package/dst/speechflow-node-deepl.d.ts +3 -1
- package/dst/speechflow-node-deepl.js +25 -20
- package/dst/speechflow-node-device.d.ts +3 -1
- package/dst/speechflow-node-device.js +53 -2
- package/dst/speechflow-node-elevenlabs.d.ts +4 -1
- package/dst/speechflow-node-elevenlabs.js +88 -49
- package/dst/speechflow-node-ffmpeg.d.ts +3 -1
- package/dst/speechflow-node-ffmpeg.js +42 -4
- package/dst/speechflow-node-file.d.ts +3 -1
- package/dst/speechflow-node-file.js +84 -13
- package/dst/speechflow-node-format.d.ts +11 -0
- package/dst/speechflow-node-format.js +80 -0
- package/dst/speechflow-node-gemma.d.ts +3 -1
- package/dst/speechflow-node-gemma.js +84 -23
- package/dst/speechflow-node-mqtt.d.ts +13 -0
- package/dst/speechflow-node-mqtt.js +181 -0
- package/dst/speechflow-node-opus.d.ts +12 -0
- package/dst/speechflow-node-opus.js +135 -0
- package/dst/speechflow-node-subtitle.d.ts +12 -0
- package/dst/speechflow-node-subtitle.js +96 -0
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
- package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
- package/dst/speechflow-node-t2t-deepl.js +133 -0
- package/dst/speechflow-node-t2t-format.d.ts +11 -0
- package/dst/speechflow-node-t2t-format.js +80 -0
- package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
- package/dst/speechflow-node-t2t-gemma.js +213 -0
- package/dst/speechflow-node-t2t-opus.d.ts +12 -0
- package/dst/speechflow-node-t2t-opus.js +135 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
- package/dst/speechflow-node-t2t-subtitle.js +96 -0
- package/dst/speechflow-node-trace.d.ts +11 -0
- package/dst/speechflow-node-trace.js +88 -0
- package/dst/speechflow-node-wav.d.ts +11 -0
- package/dst/speechflow-node-wav.js +170 -0
- package/dst/speechflow-node-websocket.d.ts +3 -1
- package/dst/speechflow-node-websocket.js +149 -49
- package/dst/speechflow-node-whisper-common.d.ts +34 -0
- package/dst/speechflow-node-whisper-common.js +7 -0
- package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-ggml.js +97 -0
- package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker.js +116 -0
- package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker2.js +82 -0
- package/dst/speechflow-node-whisper.d.ts +19 -0
- package/dst/speechflow-node-whisper.js +604 -0
- package/dst/speechflow-node-x2x-trace.d.ts +11 -0
- package/dst/speechflow-node-x2x-trace.js +88 -0
- package/dst/speechflow-node-xio-device.d.ts +13 -0
- package/dst/speechflow-node-xio-device.js +205 -0
- package/dst/speechflow-node-xio-file.d.ts +11 -0
- package/dst/speechflow-node-xio-file.js +176 -0
- package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
- package/dst/speechflow-node-xio-mqtt.js +181 -0
- package/dst/speechflow-node-xio-websocket.d.ts +13 -0
- package/dst/speechflow-node-xio-websocket.js +275 -0
- package/dst/speechflow-node.d.ts +25 -7
- package/dst/speechflow-node.js +74 -9
- package/dst/speechflow-utils.d.ts +23 -0
- package/dst/speechflow-utils.js +194 -0
- package/dst/speechflow.js +146 -43
- package/etc/biome.jsonc +12 -4
- package/etc/stx.conf +65 -0
- package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
- package/package.json +49 -31
- package/sample.yaml +61 -23
- package/src/lib.d.ts +6 -1
- package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
- package/src/speechflow-node-a2a-wav.ts +143 -0
- package/src/speechflow-node-a2t-deepgram.ts +199 -0
- package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
- package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
- package/src/speechflow-node-t2t-format.ts +85 -0
- package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
- package/src/speechflow-node-t2t-opus.ts +111 -0
- package/src/speechflow-node-t2t-subtitle.ts +101 -0
- package/src/speechflow-node-x2x-trace.ts +92 -0
- package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
- package/src/speechflow-node-xio-file.ts +153 -0
- package/src/speechflow-node-xio-mqtt.ts +154 -0
- package/src/speechflow-node-xio-websocket.ts +248 -0
- package/src/speechflow-node.ts +78 -13
- package/src/speechflow-utils.ts +212 -0
- package/src/speechflow.ts +150 -43
- package/etc/nps.yaml +0 -40
- package/src/speechflow-node-deepgram.ts +0 -133
- package/src/speechflow-node-elevenlabs.ts +0 -116
- package/src/speechflow-node-file.ts +0 -108
- package/src/speechflow-node-websocket.ts +0 -179
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
41
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
|
+
};
|
|
43
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
45
|
+
const node_stream_1 = __importDefault(require("node:stream"));
|
|
46
|
+
/* external dependencies */
|
|
47
|
+
const wav_1 = __importDefault(require("wav"));
|
|
48
|
+
/* internal dependencies */
|
|
49
|
+
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
|
|
50
|
+
const utils = __importStar(require("./speechflow-utils"));
|
|
51
|
+
/* utility class for wrapping a custom stream into a regular Transform stream */
|
|
52
|
+
class StreamWrapper extends node_stream_1.default.Transform {
|
|
53
|
+
foreignStream;
|
|
54
|
+
constructor(foreignStream, options = {}) {
|
|
55
|
+
options.readableObjectMode = true;
|
|
56
|
+
options.writableObjectMode = true;
|
|
57
|
+
super(options);
|
|
58
|
+
this.foreignStream = foreignStream;
|
|
59
|
+
this.foreignStream.on("data", (chunk) => {
|
|
60
|
+
this.push(chunk);
|
|
61
|
+
});
|
|
62
|
+
this.foreignStream.on("error", (err) => {
|
|
63
|
+
this.emit("error", err);
|
|
64
|
+
});
|
|
65
|
+
this.foreignStream.on("end", () => {
|
|
66
|
+
this.push(null);
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
_transform(chunk, encoding, callback) {
|
|
70
|
+
try {
|
|
71
|
+
const canContinue = this.foreignStream.write(chunk);
|
|
72
|
+
if (canContinue)
|
|
73
|
+
callback();
|
|
74
|
+
else
|
|
75
|
+
this.foreignStream.once("drain", callback);
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
callback(err);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
_flush(callback) {
|
|
82
|
+
try {
|
|
83
|
+
if (typeof this.foreignStream.end === "function")
|
|
84
|
+
this.foreignStream.end();
|
|
85
|
+
callback();
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
callback(err);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/* SpeechFlow node for WAV format conversion */
|
|
93
|
+
class SpeechFlowNodeWAV extends speechflow_node_1.default {
|
|
94
|
+
/* declare official node name */
|
|
95
|
+
static name = "wav";
|
|
96
|
+
/* construct node */
|
|
97
|
+
constructor(id, cfg, opts, args) {
|
|
98
|
+
super(id, cfg, opts, args);
|
|
99
|
+
/* declare node configuration parameters */
|
|
100
|
+
this.configure({
|
|
101
|
+
mode: { type: "string", pos: 1, val: "encode", match: /^(?:encode|decode)$/ }
|
|
102
|
+
});
|
|
103
|
+
/* declare node input/output format */
|
|
104
|
+
this.input = "audio";
|
|
105
|
+
this.output = "audio";
|
|
106
|
+
}
|
|
107
|
+
/* open node */
|
|
108
|
+
async open() {
|
|
109
|
+
if (this.params.mode === "encode") {
|
|
110
|
+
/* convert raw/PCM to WAV/PCM */
|
|
111
|
+
/* NOTICE: as this is a continuous stream, the resulting WAV header is not 100%
|
|
112
|
+
conforming to the WAV standard, as it has to use a zero duration information.
|
|
113
|
+
This cannot be changed in a stream-based processing. */
|
|
114
|
+
const writer = new wav_1.default.Writer({
|
|
115
|
+
format: 0x0001 /* PCM */,
|
|
116
|
+
channels: this.config.audioChannels,
|
|
117
|
+
sampleRate: this.config.audioSampleRate,
|
|
118
|
+
bitDepth: this.config.audioBitDepth
|
|
119
|
+
});
|
|
120
|
+
this.stream = new StreamWrapper(writer);
|
|
121
|
+
}
|
|
122
|
+
else if (this.params.mode === "decode") {
|
|
123
|
+
/* convert WAV/PCM to raw/PCM */
|
|
124
|
+
const reader = new wav_1.default.Reader();
|
|
125
|
+
reader.on("format", (format) => {
|
|
126
|
+
this.log("info", `WAV audio stream: format=${format.audioFormat === 0x0001 ? "PCM" :
|
|
127
|
+
"0x" + format.audioFormat.toString(16).padStart(4, "0")} ` +
|
|
128
|
+
`bitDepth=${format.bitDepth} ` +
|
|
129
|
+
`signed=${format.signed ? "yes" : "no"} ` +
|
|
130
|
+
`endian=${format.endianness} ` +
|
|
131
|
+
`sampleRate=${format.sampleRate} ` +
|
|
132
|
+
`channels=${format.channels}`);
|
|
133
|
+
if (format.audioFormat !== 0x0001 /* PCM */)
|
|
134
|
+
throw new Error("WAV not based on PCM format");
|
|
135
|
+
if (format.bitDepth !== 16)
|
|
136
|
+
throw new Error("WAV not based on 16 bit samples");
|
|
137
|
+
if (!format.signed)
|
|
138
|
+
throw new Error("WAV not based on signed integers");
|
|
139
|
+
if (format.endianness !== "LE")
|
|
140
|
+
throw new Error("WAV not based on little endianness");
|
|
141
|
+
if (format.sampleRate !== 48000)
|
|
142
|
+
throw new Error("WAV not based on 48Khz sample rate");
|
|
143
|
+
if (format.channels !== 1)
|
|
144
|
+
throw new Error("WAV not based on mono channel");
|
|
145
|
+
});
|
|
146
|
+
this.stream = new StreamWrapper(reader);
|
|
147
|
+
}
|
|
148
|
+
else
|
|
149
|
+
throw new Error(`invalid operation mode "${this.params.mode}"`);
|
|
150
|
+
/* convert regular stream into object-mode stream */
|
|
151
|
+
const wrapper1 = utils.createTransformStreamForWritableSide();
|
|
152
|
+
const wrapper2 = utils.createTransformStreamForReadableSide("audio", () => this.timeZero);
|
|
153
|
+
this.stream = node_stream_1.default.compose(wrapper1, this.stream, wrapper2);
|
|
154
|
+
}
|
|
155
|
+
/* close node */
|
|
156
|
+
async close() {
|
|
157
|
+
/* shutdown stream */
|
|
158
|
+
if (this.stream !== null) {
|
|
159
|
+
await new Promise((resolve) => {
|
|
160
|
+
if (this.stream instanceof node_stream_1.default.Duplex)
|
|
161
|
+
this.stream.end(() => { resolve(); });
|
|
162
|
+
else
|
|
163
|
+
resolve();
|
|
164
|
+
});
|
|
165
|
+
this.stream.destroy();
|
|
166
|
+
this.stream = null;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
exports.default = SpeechFlowNodeWAV;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private dg;
|
|
5
|
+
constructor(id: string, cfg: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, opts: {
|
|
8
|
+
[id: string]: any;
|
|
9
|
+
}, args: any[]);
|
|
10
|
+
open(): Promise<void>;
|
|
11
|
+
close(): Promise<void>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
41
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
|
+
};
|
|
43
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
45
|
+
const node_stream_1 = __importDefault(require("node:stream"));
|
|
46
|
+
/* external dependencies */
|
|
47
|
+
const Deepgram = __importStar(require("@deepgram/sdk"));
|
|
48
|
+
const luxon_1 = require("luxon");
|
|
49
|
+
/* internal dependencies */
|
|
50
|
+
const speechflow_node_1 = __importStar(require("./speechflow-node"));
|
|
51
|
+
const utils = __importStar(require("./speechflow-utils"));
|
|
52
|
+
/* SpeechFlow node for Deepgram speech-to-text conversion */
|
|
53
|
+
class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
54
|
+
/* declare official node name */
|
|
55
|
+
static name = "deepgram";
|
|
56
|
+
/* internal state */
|
|
57
|
+
dg = null;
|
|
58
|
+
/* construct node */
|
|
59
|
+
constructor(id, cfg, opts, args) {
|
|
60
|
+
super(id, cfg, opts, args);
|
|
61
|
+
/* declare node configuration parameters */
|
|
62
|
+
this.configure({
|
|
63
|
+
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
|
|
64
|
+
model: { type: "string", val: "nova-3", pos: 0 },
|
|
65
|
+
version: { type: "string", val: "latest", pos: 1 },
|
|
66
|
+
language: { type: "string", val: "multi", pos: 2 }
|
|
67
|
+
});
|
|
68
|
+
/* declare node input/output format */
|
|
69
|
+
this.input = "audio";
|
|
70
|
+
this.output = "text";
|
|
71
|
+
}
|
|
72
|
+
/* open node */
|
|
73
|
+
async open() {
|
|
74
|
+
/* sanity check situation */
|
|
75
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
76
|
+
throw new Error("Deepgram node currently supports PCM-S16LE audio only");
|
|
77
|
+
/* create queue for results */
|
|
78
|
+
const queue = new utils.SingleQueue();
|
|
79
|
+
/* connect to Deepgram API */
|
|
80
|
+
const deepgram = Deepgram.createClient(this.params.key);
|
|
81
|
+
let language = "en";
|
|
82
|
+
if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
|
|
83
|
+
language = this.params.language;
|
|
84
|
+
else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
|
|
85
|
+
language = "multi";
|
|
86
|
+
this.dg = deepgram.listen.live({
|
|
87
|
+
mip_opt_out: true,
|
|
88
|
+
model: this.params.model,
|
|
89
|
+
version: this.params.version,
|
|
90
|
+
language,
|
|
91
|
+
channels: this.config.audioChannels,
|
|
92
|
+
sample_rate: this.config.audioSampleRate,
|
|
93
|
+
encoding: "linear16",
|
|
94
|
+
multichannel: false,
|
|
95
|
+
endpointing: 10,
|
|
96
|
+
interim_results: false,
|
|
97
|
+
smart_format: true,
|
|
98
|
+
punctuate: true,
|
|
99
|
+
filler_words: true,
|
|
100
|
+
diarize: true, /* still not used by us */
|
|
101
|
+
numerals: true,
|
|
102
|
+
profanity_filter: false
|
|
103
|
+
});
|
|
104
|
+
/* hook onto Deepgram API events */
|
|
105
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
106
|
+
const text = data.channel?.alternatives[0].transcript ?? "";
|
|
107
|
+
if (text === "")
|
|
108
|
+
this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`);
|
|
109
|
+
else {
|
|
110
|
+
this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`);
|
|
111
|
+
const start = luxon_1.Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset);
|
|
112
|
+
const end = start.plus({ seconds: data.duration });
|
|
113
|
+
const chunk = new speechflow_node_1.SpeechFlowChunk(start, end, "final", "text", text);
|
|
114
|
+
queue.write(chunk);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
118
|
+
this.log("info", "Deepgram: metadata received");
|
|
119
|
+
});
|
|
120
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
121
|
+
this.log("info", "Deepgram: connection close");
|
|
122
|
+
});
|
|
123
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error) => {
|
|
124
|
+
this.log("error", `Deepgram: ${error.message}`);
|
|
125
|
+
this.emit("error");
|
|
126
|
+
});
|
|
127
|
+
/* wait for Deepgram API to be available */
|
|
128
|
+
await new Promise((resolve, reject) => {
|
|
129
|
+
let timer = setTimeout(() => {
|
|
130
|
+
if (timer !== null) {
|
|
131
|
+
timer = null;
|
|
132
|
+
reject(new Error("Deepgram: timeout waiting for connection open"));
|
|
133
|
+
}
|
|
134
|
+
}, 3000);
|
|
135
|
+
this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
136
|
+
this.log("info", "Deepgram: connection open");
|
|
137
|
+
if (timer !== null) {
|
|
138
|
+
clearTimeout(timer);
|
|
139
|
+
timer = null;
|
|
140
|
+
}
|
|
141
|
+
resolve(true);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
/* remember opening time to receive time zero offset */
|
|
145
|
+
this.timeOpen = luxon_1.DateTime.now();
|
|
146
|
+
/* workaround Deepgram initialization problems */
|
|
147
|
+
let initDone = false;
|
|
148
|
+
let initTimeout = null;
|
|
149
|
+
const initTimeoutStart = () => {
|
|
150
|
+
if (initDone)
|
|
151
|
+
return;
|
|
152
|
+
setTimeout(async () => {
|
|
153
|
+
if (initTimeout === null)
|
|
154
|
+
return;
|
|
155
|
+
initTimeout = null;
|
|
156
|
+
this.log("warning", "Deepgram: initialization timeout -- restarting service usage");
|
|
157
|
+
await this.close();
|
|
158
|
+
this.open();
|
|
159
|
+
}, 3000);
|
|
160
|
+
};
|
|
161
|
+
const initTimeoutStop = () => {
|
|
162
|
+
if (initDone)
|
|
163
|
+
return;
|
|
164
|
+
initDone = true;
|
|
165
|
+
if (initTimeout !== null) {
|
|
166
|
+
clearTimeout(initTimeout);
|
|
167
|
+
initTimeout = null;
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
/* provide Duplex stream and internally attach to Deepgram API */
|
|
171
|
+
const dg = this.dg;
|
|
172
|
+
const log = (level, msg) => {
|
|
173
|
+
this.log(level, msg);
|
|
174
|
+
};
|
|
175
|
+
const encoding = this.config.textEncoding;
|
|
176
|
+
this.stream = new node_stream_1.default.Duplex({
|
|
177
|
+
writableObjectMode: true,
|
|
178
|
+
readableObjectMode: true,
|
|
179
|
+
decodeStrings: false,
|
|
180
|
+
write(chunk, encoding, callback) {
|
|
181
|
+
if (chunk.type !== "audio")
|
|
182
|
+
callback(new Error("expected audio input chunk"));
|
|
183
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
184
|
+
callback(new Error("expected Buffer input chunk"));
|
|
185
|
+
else {
|
|
186
|
+
if (chunk.payload.byteLength > 0) {
|
|
187
|
+
log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`);
|
|
188
|
+
initTimeoutStart();
|
|
189
|
+
dg.send(chunk.payload); /* intentionally discard all time information */
|
|
190
|
+
}
|
|
191
|
+
callback();
|
|
192
|
+
}
|
|
193
|
+
},
|
|
194
|
+
read(size) {
|
|
195
|
+
queue.read().then((chunk) => {
|
|
196
|
+
log("info", `Deepgram: receive data (${chunk.payload.length} bytes)`);
|
|
197
|
+
initTimeoutStop();
|
|
198
|
+
this.push(chunk, encoding);
|
|
199
|
+
});
|
|
200
|
+
},
|
|
201
|
+
final(callback) {
|
|
202
|
+
dg.requestClose();
|
|
203
|
+
this.push(null);
|
|
204
|
+
callback();
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
/* close node */
|
|
209
|
+
async close() {
|
|
210
|
+
/* close stream */
|
|
211
|
+
if (this.stream !== null) {
|
|
212
|
+
this.stream.destroy();
|
|
213
|
+
this.stream = null;
|
|
214
|
+
}
|
|
215
|
+
/* shutdown Deepgram API */
|
|
216
|
+
if (this.dg !== null)
|
|
217
|
+
this.dg.requestClose();
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
exports.default = SpeechFlowNodeDeepgram;
|
|
@@ -2,7 +2,9 @@ import SpeechFlowNode from "./speechflow-node";
|
|
|
2
2
|
export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
3
3
|
static name: string;
|
|
4
4
|
private dg;
|
|
5
|
-
constructor(id: string,
|
|
5
|
+
constructor(id: string, cfg: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, opts: {
|
|
6
8
|
[id: string]: any;
|
|
7
9
|
}, args: any[]);
|
|
8
10
|
open(): Promise<void>;
|
|
@@ -42,12 +42,13 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
42
42
|
};
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
44
|
/* standard dependencies */
|
|
45
|
-
const node_events_1 = require("node:events");
|
|
46
|
-
/* external dependencies */
|
|
47
45
|
const node_stream_1 = __importDefault(require("node:stream"));
|
|
46
|
+
/* external dependencies */
|
|
48
47
|
const Deepgram = __importStar(require("@deepgram/sdk"));
|
|
48
|
+
const luxon_1 = require("luxon");
|
|
49
49
|
/* internal dependencies */
|
|
50
|
-
const speechflow_node_1 =
|
|
50
|
+
const speechflow_node_1 = __importStar(require("./speechflow-node"));
|
|
51
|
+
const utils = __importStar(require("./speechflow-utils"));
|
|
51
52
|
/* SpeechFlow node for Deepgram speech-to-text conversion */
|
|
52
53
|
class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
53
54
|
/* declare official node name */
|
|
@@ -55,8 +56,8 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
|
55
56
|
/* internal state */
|
|
56
57
|
dg = null;
|
|
57
58
|
/* construct node */
|
|
58
|
-
constructor(id, opts, args) {
|
|
59
|
-
super(id, opts, args);
|
|
59
|
+
constructor(id, cfg, opts, args) {
|
|
60
|
+
super(id, cfg, opts, args);
|
|
60
61
|
/* declare node configuration parameters */
|
|
61
62
|
this.configure({
|
|
62
63
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
|
|
@@ -74,13 +75,19 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
|
74
75
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
75
76
|
throw new Error("Deepgram node currently supports PCM-S16LE audio only");
|
|
76
77
|
/* create queue for results */
|
|
77
|
-
const queue = new
|
|
78
|
+
const queue = new utils.SingleQueue();
|
|
78
79
|
/* connect to Deepgram API */
|
|
79
80
|
const deepgram = Deepgram.createClient(this.params.key);
|
|
81
|
+
let language = "en";
|
|
82
|
+
if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
|
|
83
|
+
language = this.params.language;
|
|
84
|
+
else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
|
|
85
|
+
language = "multi";
|
|
80
86
|
this.dg = deepgram.listen.live({
|
|
87
|
+
mip_opt_out: true,
|
|
81
88
|
model: this.params.model,
|
|
82
89
|
version: this.params.version,
|
|
83
|
-
language
|
|
90
|
+
language,
|
|
84
91
|
channels: this.config.audioChannels,
|
|
85
92
|
sample_rate: this.config.audioSampleRate,
|
|
86
93
|
encoding: "linear16",
|
|
@@ -90,18 +97,22 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
|
90
97
|
smart_format: true,
|
|
91
98
|
punctuate: true,
|
|
92
99
|
filler_words: true,
|
|
93
|
-
diarize: true,
|
|
100
|
+
diarize: true, /* still not used by us */
|
|
94
101
|
numerals: true,
|
|
95
|
-
|
|
96
|
-
profanity_filter: true,
|
|
97
|
-
utterances: false
|
|
102
|
+
profanity_filter: false
|
|
98
103
|
});
|
|
99
104
|
/* hook onto Deepgram API events */
|
|
100
105
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
101
106
|
const text = data.channel?.alternatives[0].transcript ?? "";
|
|
102
107
|
if (text === "")
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`);
|
|
109
|
+
else {
|
|
110
|
+
this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`);
|
|
111
|
+
const start = luxon_1.Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset);
|
|
112
|
+
const end = start.plus({ seconds: data.duration });
|
|
113
|
+
const chunk = new speechflow_node_1.SpeechFlowChunk(start, end, "final", "text", text);
|
|
114
|
+
queue.write(chunk);
|
|
115
|
+
}
|
|
105
116
|
});
|
|
106
117
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
107
118
|
this.log("info", "Deepgram: metadata received");
|
|
@@ -114,30 +125,83 @@ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
|
114
125
|
this.emit("error");
|
|
115
126
|
});
|
|
116
127
|
/* wait for Deepgram API to be available */
|
|
117
|
-
await new Promise((resolve) => {
|
|
128
|
+
await new Promise((resolve, reject) => {
|
|
129
|
+
let timer = setTimeout(() => {
|
|
130
|
+
if (timer !== null) {
|
|
131
|
+
timer = null;
|
|
132
|
+
reject(new Error("Deepgram: timeout waiting for connection open"));
|
|
133
|
+
}
|
|
134
|
+
}, 3000);
|
|
118
135
|
this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
119
136
|
this.log("info", "Deepgram: connection open");
|
|
137
|
+
if (timer !== null) {
|
|
138
|
+
clearTimeout(timer);
|
|
139
|
+
timer = null;
|
|
140
|
+
}
|
|
120
141
|
resolve(true);
|
|
121
142
|
});
|
|
122
143
|
});
|
|
144
|
+
/* remember opening time to receive time zero offset */
|
|
145
|
+
this.timeOpen = luxon_1.DateTime.now();
|
|
146
|
+
/* workaround Deepgram initialization problems */
|
|
147
|
+
let initDone = false;
|
|
148
|
+
let initTimeout = null;
|
|
149
|
+
const initTimeoutStart = () => {
|
|
150
|
+
if (initDone)
|
|
151
|
+
return;
|
|
152
|
+
setTimeout(async () => {
|
|
153
|
+
if (initTimeout === null)
|
|
154
|
+
return;
|
|
155
|
+
initTimeout = null;
|
|
156
|
+
this.log("warning", "Deepgram: initialization timeout -- restarting service usage");
|
|
157
|
+
await this.close();
|
|
158
|
+
this.open();
|
|
159
|
+
}, 3000);
|
|
160
|
+
};
|
|
161
|
+
const initTimeoutStop = () => {
|
|
162
|
+
if (initDone)
|
|
163
|
+
return;
|
|
164
|
+
initDone = true;
|
|
165
|
+
if (initTimeout !== null) {
|
|
166
|
+
clearTimeout(initTimeout);
|
|
167
|
+
initTimeout = null;
|
|
168
|
+
}
|
|
169
|
+
};
|
|
123
170
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
124
171
|
const dg = this.dg;
|
|
172
|
+
const log = (level, msg) => {
|
|
173
|
+
this.log(level, msg);
|
|
174
|
+
};
|
|
175
|
+
const encoding = this.config.textEncoding;
|
|
125
176
|
this.stream = new node_stream_1.default.Duplex({
|
|
177
|
+
writableObjectMode: true,
|
|
178
|
+
readableObjectMode: true,
|
|
179
|
+
decodeStrings: false,
|
|
126
180
|
write(chunk, encoding, callback) {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
181
|
+
if (chunk.type !== "audio")
|
|
182
|
+
callback(new Error("expected audio input chunk"));
|
|
183
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
184
|
+
callback(new Error("expected Buffer input chunk"));
|
|
185
|
+
else {
|
|
186
|
+
if (chunk.payload.byteLength > 0) {
|
|
187
|
+
log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`);
|
|
188
|
+
initTimeoutStart();
|
|
189
|
+
dg.send(chunk.payload); /* intentionally discard all time information */
|
|
190
|
+
}
|
|
191
|
+
callback();
|
|
192
|
+
}
|
|
133
193
|
},
|
|
134
194
|
read(size) {
|
|
135
|
-
queue.
|
|
136
|
-
|
|
195
|
+
queue.read().then((chunk) => {
|
|
196
|
+
log("info", `Deepgram: receive data (${chunk.payload.length} bytes)`);
|
|
197
|
+
initTimeoutStop();
|
|
198
|
+
this.push(chunk, encoding);
|
|
137
199
|
});
|
|
138
200
|
},
|
|
139
201
|
final(callback) {
|
|
140
202
|
dg.requestClose();
|
|
203
|
+
this.push(null);
|
|
204
|
+
callback();
|
|
141
205
|
}
|
|
142
206
|
});
|
|
143
207
|
}
|
|
@@ -2,7 +2,9 @@ import SpeechFlowNode from "./speechflow-node";
|
|
|
2
2
|
export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
3
3
|
static name: string;
|
|
4
4
|
private deepl;
|
|
5
|
-
constructor(id: string,
|
|
5
|
+
constructor(id: string, cfg: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, opts: {
|
|
6
8
|
[id: string]: any;
|
|
7
9
|
}, args: any[]);
|
|
8
10
|
open(): Promise<void>;
|
|
@@ -43,7 +43,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
44
|
/* standard dependencies */
|
|
45
45
|
const node_stream_1 = __importDefault(require("node:stream"));
|
|
46
|
-
const node_events_1 = require("node:events");
|
|
47
46
|
/* external dependencies */
|
|
48
47
|
const DeepL = __importStar(require("deepl-node"));
|
|
49
48
|
/* internal dependencies */
|
|
@@ -55,8 +54,8 @@ class SpeechFlowNodeDeepL extends speechflow_node_1.default {
|
|
|
55
54
|
/* internal state */
|
|
56
55
|
deepl = null;
|
|
57
56
|
/* construct node */
|
|
58
|
-
constructor(id, opts, args) {
|
|
59
|
-
super(id, opts, args);
|
|
57
|
+
constructor(id, cfg, opts, args) {
|
|
58
|
+
super(id, cfg, opts, args);
|
|
60
59
|
/* declare node configuration parameters */
|
|
61
60
|
this.configure({
|
|
62
61
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
|
|
@@ -84,27 +83,33 @@ class SpeechFlowNodeDeepL extends speechflow_node_1.default {
|
|
|
84
83
|
return (result?.text ?? text);
|
|
85
84
|
};
|
|
86
85
|
/* establish a duplex stream and connect it to DeepL translation */
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
callback();
|
|
94
|
-
}
|
|
86
|
+
this.stream = new node_stream_1.default.Transform({
|
|
87
|
+
readableObjectMode: true,
|
|
88
|
+
writableObjectMode: true,
|
|
89
|
+
decodeStrings: false,
|
|
90
|
+
transform(chunk, encoding, callback) {
|
|
91
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
92
|
+
callback(new Error("invalid chunk payload type"));
|
|
95
93
|
else {
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
if (chunk.payload === "") {
|
|
95
|
+
this.push(chunk);
|
|
98
96
|
callback();
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
translate(chunk.payload).then((payload) => {
|
|
100
|
+
const chunkNew = chunk.clone();
|
|
101
|
+
chunkNew.payload = payload;
|
|
102
|
+
this.push(chunkNew);
|
|
103
|
+
callback();
|
|
104
|
+
}).catch((err) => {
|
|
105
|
+
callback(err);
|
|
106
|
+
});
|
|
107
|
+
}
|
|
102
108
|
}
|
|
103
109
|
},
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
});
|
|
110
|
+
final(callback) {
|
|
111
|
+
this.push(null);
|
|
112
|
+
callback();
|
|
108
113
|
}
|
|
109
114
|
});
|
|
110
115
|
}
|