speechflow 0.9.4 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +227 -54
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
- package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
- package/dst/speechflow-node-a2a-wav.d.ts +11 -0
- package/dst/speechflow-node-a2a-wav.js +170 -0
- package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
- package/dst/speechflow-node-a2t-deepgram.js +220 -0
- package/dst/speechflow-node-deepgram.d.ts +3 -1
- package/dst/speechflow-node-deepgram.js +86 -22
- package/dst/speechflow-node-deepl.d.ts +3 -1
- package/dst/speechflow-node-deepl.js +25 -20
- package/dst/speechflow-node-device.d.ts +3 -1
- package/dst/speechflow-node-device.js +53 -2
- package/dst/speechflow-node-elevenlabs.d.ts +4 -1
- package/dst/speechflow-node-elevenlabs.js +88 -49
- package/dst/speechflow-node-ffmpeg.d.ts +3 -1
- package/dst/speechflow-node-ffmpeg.js +42 -4
- package/dst/speechflow-node-file.d.ts +3 -1
- package/dst/speechflow-node-file.js +84 -13
- package/dst/speechflow-node-format.d.ts +11 -0
- package/dst/speechflow-node-format.js +80 -0
- package/dst/speechflow-node-gemma.d.ts +3 -1
- package/dst/speechflow-node-gemma.js +84 -23
- package/dst/speechflow-node-mqtt.d.ts +13 -0
- package/dst/speechflow-node-mqtt.js +181 -0
- package/dst/speechflow-node-opus.d.ts +12 -0
- package/dst/speechflow-node-opus.js +135 -0
- package/dst/speechflow-node-subtitle.d.ts +12 -0
- package/dst/speechflow-node-subtitle.js +96 -0
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
- package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
- package/dst/speechflow-node-t2t-deepl.js +133 -0
- package/dst/speechflow-node-t2t-format.d.ts +11 -0
- package/dst/speechflow-node-t2t-format.js +80 -0
- package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
- package/dst/speechflow-node-t2t-gemma.js +213 -0
- package/dst/speechflow-node-t2t-opus.d.ts +12 -0
- package/dst/speechflow-node-t2t-opus.js +135 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
- package/dst/speechflow-node-t2t-subtitle.js +96 -0
- package/dst/speechflow-node-trace.d.ts +11 -0
- package/dst/speechflow-node-trace.js +88 -0
- package/dst/speechflow-node-wav.d.ts +11 -0
- package/dst/speechflow-node-wav.js +170 -0
- package/dst/speechflow-node-websocket.d.ts +3 -1
- package/dst/speechflow-node-websocket.js +149 -49
- package/dst/speechflow-node-whisper-common.d.ts +34 -0
- package/dst/speechflow-node-whisper-common.js +7 -0
- package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-ggml.js +97 -0
- package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker.js +116 -0
- package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker2.js +82 -0
- package/dst/speechflow-node-whisper.d.ts +19 -0
- package/dst/speechflow-node-whisper.js +604 -0
- package/dst/speechflow-node-x2x-trace.d.ts +11 -0
- package/dst/speechflow-node-x2x-trace.js +88 -0
- package/dst/speechflow-node-xio-device.d.ts +13 -0
- package/dst/speechflow-node-xio-device.js +205 -0
- package/dst/speechflow-node-xio-file.d.ts +11 -0
- package/dst/speechflow-node-xio-file.js +176 -0
- package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
- package/dst/speechflow-node-xio-mqtt.js +181 -0
- package/dst/speechflow-node-xio-websocket.d.ts +13 -0
- package/dst/speechflow-node-xio-websocket.js +275 -0
- package/dst/speechflow-node.d.ts +25 -7
- package/dst/speechflow-node.js +74 -9
- package/dst/speechflow-utils.d.ts +23 -0
- package/dst/speechflow-utils.js +194 -0
- package/dst/speechflow.js +146 -43
- package/etc/biome.jsonc +12 -4
- package/etc/stx.conf +65 -0
- package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
- package/package.json +49 -31
- package/sample.yaml +61 -23
- package/src/lib.d.ts +6 -1
- package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
- package/src/speechflow-node-a2a-wav.ts +143 -0
- package/src/speechflow-node-a2t-deepgram.ts +199 -0
- package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
- package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
- package/src/speechflow-node-t2t-format.ts +85 -0
- package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
- package/src/speechflow-node-t2t-opus.ts +111 -0
- package/src/speechflow-node-t2t-subtitle.ts +101 -0
- package/src/speechflow-node-x2x-trace.ts +92 -0
- package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
- package/src/speechflow-node-xio-file.ts +153 -0
- package/src/speechflow-node-xio-mqtt.ts +154 -0
- package/src/speechflow-node-xio-websocket.ts +248 -0
- package/src/speechflow-node.ts +78 -13
- package/src/speechflow-utils.ts +212 -0
- package/src/speechflow.ts +150 -43
- package/etc/nps.yaml +0 -40
- package/src/speechflow-node-deepgram.ts +0 -133
- package/src/speechflow-node-elevenlabs.ts +0 -116
- package/src/speechflow-node-file.ts +0 -108
- package/src/speechflow-node-websocket.ts +0 -179
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
8
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
9
|
+
};
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
exports.DoubleQueue = exports.SingleQueue = void 0;
|
|
12
|
+
exports.audioBufferDuration = audioBufferDuration;
|
|
13
|
+
exports.createTransformStreamForWritableSide = createTransformStreamForWritableSide;
|
|
14
|
+
exports.createTransformStreamForReadableSide = createTransformStreamForReadableSide;
|
|
15
|
+
exports.ensureStreamChunk = ensureStreamChunk;
|
|
16
|
+
exports.streamChunkEncode = streamChunkEncode;
|
|
17
|
+
exports.streamChunkDecode = streamChunkDecode;
|
|
18
|
+
/* external dependencies */
|
|
19
|
+
const node_stream_1 = __importDefault(require("node:stream"));
|
|
20
|
+
const node_events_1 = require("node:events");
|
|
21
|
+
const luxon_1 = require("luxon");
|
|
22
|
+
const cbor2_1 = __importDefault(require("cbor2"));
|
|
23
|
+
/* internal dependencies */
|
|
24
|
+
const speechflow_node_1 = require("./speechflow-node");
|
|
25
|
+
/* calculate duration of an audio buffer */
|
|
26
|
+
function audioBufferDuration(buffer, sampleRate = 48000, bitDepth = 16, channels = 1, littleEndian = true) {
|
|
27
|
+
if (!Buffer.isBuffer(buffer))
|
|
28
|
+
throw new Error("invalid input (Buffer expected)");
|
|
29
|
+
if (littleEndian !== true)
|
|
30
|
+
throw new Error("only Little Endian supported");
|
|
31
|
+
const bytesPerSample = bitDepth / 8;
|
|
32
|
+
const totalSamples = buffer.length / (bytesPerSample * channels);
|
|
33
|
+
return totalSamples / sampleRate;
|
|
34
|
+
}
|
|
35
|
+
/* create a Duplex/Transform stream which has
|
|
36
|
+
object-mode on Writable side and buffer/string-mode on Readable side */
|
|
37
|
+
function createTransformStreamForWritableSide() {
|
|
38
|
+
return new node_stream_1.default.Transform({
|
|
39
|
+
readableObjectMode: true,
|
|
40
|
+
writableObjectMode: true,
|
|
41
|
+
decodeStrings: false,
|
|
42
|
+
transform(chunk, encoding, callback) {
|
|
43
|
+
this.push(chunk.payload);
|
|
44
|
+
callback();
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
/* create a Duplex/Transform stream which has
|
|
49
|
+
object-mode on Readable side and buffer/string-mode on Writable side */
|
|
50
|
+
function createTransformStreamForReadableSide(type, getTimeZero) {
|
|
51
|
+
return new node_stream_1.default.Transform({
|
|
52
|
+
readableObjectMode: true,
|
|
53
|
+
writableObjectMode: true,
|
|
54
|
+
decodeStrings: false,
|
|
55
|
+
transform(chunk, encoding, callback) {
|
|
56
|
+
const timeZero = getTimeZero();
|
|
57
|
+
const start = luxon_1.DateTime.now().diff(timeZero);
|
|
58
|
+
let end = start;
|
|
59
|
+
if (type === "audio") {
|
|
60
|
+
const duration = audioBufferDuration(chunk);
|
|
61
|
+
end = start.plus(duration * 1000);
|
|
62
|
+
}
|
|
63
|
+
const obj = new speechflow_node_1.SpeechFlowChunk(start, end, "final", type, chunk);
|
|
64
|
+
this.push(obj);
|
|
65
|
+
callback();
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
/* ensure a chunk is of a certain type and format */
|
|
70
|
+
function ensureStreamChunk(type, chunk) {
|
|
71
|
+
if (chunk instanceof speechflow_node_1.SpeechFlowChunk) {
|
|
72
|
+
if (chunk.type !== type)
|
|
73
|
+
throw new Error(`invalid payload chunk (expected ${type} type, received ${chunk.type} type)`);
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
if (type === "text" && Buffer.isBuffer(chunk))
|
|
77
|
+
chunk = chunk.toString("utf8");
|
|
78
|
+
else if (type === "audio" && !Buffer.isBuffer(chunk))
|
|
79
|
+
chunk = Buffer.from(chunk);
|
|
80
|
+
}
|
|
81
|
+
return chunk;
|
|
82
|
+
}
|
|
83
|
+
/* encode/serialize chunk of data */
|
|
84
|
+
function streamChunkEncode(chunk) {
|
|
85
|
+
let payload;
|
|
86
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
87
|
+
payload = new Uint8Array(chunk.payload);
|
|
88
|
+
else {
|
|
89
|
+
const encoder = new TextEncoder();
|
|
90
|
+
payload = encoder.encode(chunk.payload);
|
|
91
|
+
}
|
|
92
|
+
const data = {
|
|
93
|
+
timestampStart: chunk.timestampStart.toMillis(),
|
|
94
|
+
timestampEnd: chunk.timestampEnd.toMillis(),
|
|
95
|
+
kind: chunk.kind,
|
|
96
|
+
type: chunk.type,
|
|
97
|
+
payload
|
|
98
|
+
};
|
|
99
|
+
const _data = cbor2_1.default.encode(data);
|
|
100
|
+
return _data;
|
|
101
|
+
}
|
|
102
|
+
/* decode/unserialize chunk of data */
|
|
103
|
+
function streamChunkDecode(_data) {
|
|
104
|
+
let data;
|
|
105
|
+
try {
|
|
106
|
+
data = cbor2_1.default.decode(_data);
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
throw new Error(`CBOR decoding failed: ${err}`);
|
|
110
|
+
}
|
|
111
|
+
let payload;
|
|
112
|
+
if (data.type === "audio")
|
|
113
|
+
payload = Buffer.from(data.payload);
|
|
114
|
+
else
|
|
115
|
+
payload = (new TextDecoder()).decode(data.payload);
|
|
116
|
+
const chunk = new speechflow_node_1.SpeechFlowChunk(luxon_1.Duration.fromMillis(data.timestampStart), luxon_1.Duration.fromMillis(data.timestampEnd), data.kind, data.type, payload);
|
|
117
|
+
return chunk;
|
|
118
|
+
}
|
|
119
|
+
/* helper class for single item queue */
|
|
120
|
+
class SingleQueue extends node_events_1.EventEmitter {
|
|
121
|
+
queue = new Array();
|
|
122
|
+
write(item) {
|
|
123
|
+
this.queue.unshift(item);
|
|
124
|
+
this.emit("dequeue");
|
|
125
|
+
}
|
|
126
|
+
read() {
|
|
127
|
+
return new Promise((resolve, reject) => {
|
|
128
|
+
const consume = () => {
|
|
129
|
+
if (this.queue.length > 0)
|
|
130
|
+
return this.queue.pop();
|
|
131
|
+
else
|
|
132
|
+
return null;
|
|
133
|
+
};
|
|
134
|
+
let item = consume();
|
|
135
|
+
if (item !== null)
|
|
136
|
+
resolve(item);
|
|
137
|
+
else {
|
|
138
|
+
const tryToConsume = () => {
|
|
139
|
+
item = consume();
|
|
140
|
+
if (item !== null)
|
|
141
|
+
resolve(item);
|
|
142
|
+
else
|
|
143
|
+
this.once("dequeue", tryToConsume);
|
|
144
|
+
};
|
|
145
|
+
this.once("dequeue", tryToConsume);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
exports.SingleQueue = SingleQueue;
|
|
151
|
+
/* helper class for double-item queue */
|
|
152
|
+
class DoubleQueue extends node_events_1.EventEmitter {
|
|
153
|
+
queue0 = new Array();
|
|
154
|
+
queue1 = new Array();
|
|
155
|
+
notify() {
|
|
156
|
+
if (this.queue0.length > 0 && this.queue1.length > 0)
|
|
157
|
+
this.emit("dequeue");
|
|
158
|
+
}
|
|
159
|
+
write0(item) {
|
|
160
|
+
this.queue0.unshift(item);
|
|
161
|
+
this.notify();
|
|
162
|
+
}
|
|
163
|
+
write1(item) {
|
|
164
|
+
this.queue1.unshift(item);
|
|
165
|
+
this.notify();
|
|
166
|
+
}
|
|
167
|
+
read() {
|
|
168
|
+
return new Promise((resolve, reject) => {
|
|
169
|
+
const consume = () => {
|
|
170
|
+
if (this.queue0.length > 0 && this.queue1.length > 0) {
|
|
171
|
+
const item0 = this.queue0.pop();
|
|
172
|
+
const item1 = this.queue1.pop();
|
|
173
|
+
return [item0, item1];
|
|
174
|
+
}
|
|
175
|
+
else
|
|
176
|
+
return null;
|
|
177
|
+
};
|
|
178
|
+
let items = consume();
|
|
179
|
+
if (items !== null)
|
|
180
|
+
resolve(items);
|
|
181
|
+
else {
|
|
182
|
+
const tryToConsume = () => {
|
|
183
|
+
items = consume();
|
|
184
|
+
if (items !== null)
|
|
185
|
+
resolve(items);
|
|
186
|
+
else
|
|
187
|
+
this.once("dequeue", tryToConsume);
|
|
188
|
+
};
|
|
189
|
+
this.once("dequeue", tryToConsume);
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
exports.DoubleQueue = DoubleQueue;
|
package/dst/speechflow.js
CHANGED
|
@@ -10,8 +10,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
/* standard dependencies */
|
|
13
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
13
14
|
const node_stream_1 = __importDefault(require("node:stream"));
|
|
15
|
+
const node_events_1 = require("node:events");
|
|
14
16
|
/* external dependencies */
|
|
17
|
+
const luxon_1 = require("luxon");
|
|
15
18
|
const cli_io_1 = __importDefault(require("cli-io"));
|
|
16
19
|
const yargs_1 = __importDefault(require("yargs"));
|
|
17
20
|
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
@@ -19,10 +22,16 @@ const flowlink_1 = __importDefault(require("flowlink"));
|
|
|
19
22
|
const object_path_1 = __importDefault(require("object-path"));
|
|
20
23
|
const installed_packages_1 = __importDefault(require("installed-packages"));
|
|
21
24
|
const dotenvx_1 = __importDefault(require("@dotenvx/dotenvx"));
|
|
25
|
+
const syspath_1 = __importDefault(require("syspath"));
|
|
22
26
|
const package_json_1 = __importDefault(require("../package.json"));
|
|
23
27
|
/* central CLI context */
|
|
24
28
|
let cli = null;
|
|
25
29
|
(async () => {
|
|
30
|
+
/* determine system paths */
|
|
31
|
+
const { dataDir } = (0, syspath_1.default)({
|
|
32
|
+
appName: "speechflow",
|
|
33
|
+
dataDirAutoCreate: true
|
|
34
|
+
});
|
|
26
35
|
/* parse command-line arguments */
|
|
27
36
|
const args = await (0, yargs_1.default)()
|
|
28
37
|
/* eslint @stylistic/indent: off */
|
|
@@ -30,9 +39,10 @@ let cli = null;
|
|
|
30
39
|
"[-h|--help] " +
|
|
31
40
|
"[-V|--version] " +
|
|
32
41
|
"[-v|--verbose <level>] " +
|
|
42
|
+
"[-C|--cache <directory>] " +
|
|
33
43
|
"[-e|--expression <expression>] " +
|
|
34
|
-
"[-f|--
|
|
35
|
-
"[-c|--config <
|
|
44
|
+
"[-f|--file <file>] " +
|
|
45
|
+
"[-c|--config <id>@<yaml-config-file>] " +
|
|
36
46
|
"[<argument> [...]]")
|
|
37
47
|
.help("h").alias("h", "help").default("h", false)
|
|
38
48
|
.describe("h", "show usage help")
|
|
@@ -40,12 +50,14 @@ let cli = null;
|
|
|
40
50
|
.describe("V", "show program version information")
|
|
41
51
|
.string("v").nargs("v", 1).alias("v", "log-level").default("v", "warning")
|
|
42
52
|
.describe("v", "level for verbose logging ('none', 'error', 'warning', 'info', 'debug')")
|
|
53
|
+
.string("C").nargs("C", 1).alias("C", "cache").default("C", node_path_1.default.join(dataDir, "cache"))
|
|
54
|
+
.describe("C", "directory for cached files (primarily AI model files)")
|
|
43
55
|
.string("e").nargs("e", 1).alias("e", "expression").default("e", "")
|
|
44
|
-
.describe("e", "FlowLink expression")
|
|
45
|
-
.string("f").nargs("f", 1).alias("f", "
|
|
56
|
+
.describe("e", "FlowLink expression string")
|
|
57
|
+
.string("f").nargs("f", 1).alias("f", "file").default("f", "")
|
|
46
58
|
.describe("f", "FlowLink expression file")
|
|
47
59
|
.string("c").nargs("c", 1).alias("c", "config-file").default("c", "")
|
|
48
|
-
.describe("c", "
|
|
60
|
+
.describe("c", "FlowLink expression reference into YAML file (in format <id>@<file>)")
|
|
49
61
|
.version(false)
|
|
50
62
|
.strict()
|
|
51
63
|
.showHelpOnFail(true)
|
|
@@ -117,14 +129,20 @@ let cli = null;
|
|
|
117
129
|
const nodes = {};
|
|
118
130
|
/* load internal SpeechFlow nodes */
|
|
119
131
|
const pkgsI = [
|
|
120
|
-
"./speechflow-node-
|
|
121
|
-
"./speechflow-node-
|
|
122
|
-
"./speechflow-node-
|
|
123
|
-
"./speechflow-node-
|
|
124
|
-
"./speechflow-node-
|
|
125
|
-
"./speechflow-node-
|
|
126
|
-
"./speechflow-node-
|
|
127
|
-
"./speechflow-node-
|
|
132
|
+
"./speechflow-node-a2a-ffmpeg.js",
|
|
133
|
+
"./speechflow-node-a2a-wav.js",
|
|
134
|
+
"./speechflow-node-a2t-deepgram.js",
|
|
135
|
+
"./speechflow-node-t2a-elevenlabs.js",
|
|
136
|
+
"./speechflow-node-t2t-deepl.js",
|
|
137
|
+
"./speechflow-node-t2t-format.js",
|
|
138
|
+
"./speechflow-node-t2t-gemma.js",
|
|
139
|
+
"./speechflow-node-t2t-opus.js",
|
|
140
|
+
"./speechflow-node-t2t-subtitle.js",
|
|
141
|
+
"./speechflow-node-x2x-trace.js",
|
|
142
|
+
"./speechflow-node-xio-device.js",
|
|
143
|
+
"./speechflow-node-xio-file.js",
|
|
144
|
+
"./speechflow-node-xio-mqtt.js",
|
|
145
|
+
"./speechflow-node-xio-websocket.js"
|
|
128
146
|
];
|
|
129
147
|
for (const pkg of pkgsI) {
|
|
130
148
|
let node = await import(pkg);
|
|
@@ -162,34 +180,78 @@ let cli = null;
|
|
|
162
180
|
let nodenum = 1;
|
|
163
181
|
const variables = { argv: args._, env: process.env };
|
|
164
182
|
const graphNodes = new Set();
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
cli.log("
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
const cfg = {
|
|
184
|
+
audioChannels: 1,
|
|
185
|
+
audioBitDepth: 16,
|
|
186
|
+
audioLittleEndian: true,
|
|
187
|
+
audioSampleRate: 48000,
|
|
188
|
+
textEncoding: "utf8",
|
|
189
|
+
cacheDir: args.cache
|
|
190
|
+
};
|
|
191
|
+
let ast;
|
|
192
|
+
try {
|
|
193
|
+
ast = flowlink.compile(config);
|
|
194
|
+
}
|
|
195
|
+
catch (err) {
|
|
196
|
+
if (err instanceof Error && err.name === "FlowLinkError")
|
|
197
|
+
cli.log("error", `failed to parse SpeechFlow configuration: ${err.toString()}"`);
|
|
198
|
+
else if (err instanceof Error)
|
|
199
|
+
cli.log("error", `failed to parse SpeechFlow configuration: ${err.message}"`);
|
|
200
|
+
else
|
|
201
|
+
cli.log("error", "failed to parse SpeechFlow configuration: internal error");
|
|
202
|
+
process.exit(1);
|
|
203
|
+
}
|
|
204
|
+
try {
|
|
205
|
+
flowlink.execute(ast, {
|
|
206
|
+
resolveVariable(id) {
|
|
207
|
+
if (!object_path_1.default.has(variables, id))
|
|
208
|
+
throw new Error(`failed to resolve variable "${id}"`);
|
|
209
|
+
const value = object_path_1.default.get(variables, id);
|
|
210
|
+
cli.log("info", `resolve variable: "${id}" -> "${value}"`);
|
|
211
|
+
return value;
|
|
212
|
+
},
|
|
213
|
+
createNode(id, opts, args) {
|
|
214
|
+
if (nodes[id] === undefined)
|
|
215
|
+
throw new Error(`unknown node "${id}"`);
|
|
216
|
+
let node;
|
|
217
|
+
try {
|
|
218
|
+
node = new nodes[id](`${id}[${nodenum}]`, cfg, opts, args);
|
|
219
|
+
}
|
|
220
|
+
catch (err) {
|
|
221
|
+
/* fatal error */
|
|
222
|
+
if (err instanceof Error)
|
|
223
|
+
cli.log("error", `creation of "${id}[${nodenum}]" node failed: ${err.message}`);
|
|
224
|
+
else
|
|
225
|
+
cli.log("error", `creation of "${id}"[${nodenum}] node failed: ${err}`);
|
|
226
|
+
process.exit(1);
|
|
227
|
+
}
|
|
228
|
+
nodenum++;
|
|
229
|
+
const params = Object.keys(node.params)
|
|
230
|
+
.map((key) => `${key}: ${JSON.stringify(node.params[key])}`).join(", ");
|
|
231
|
+
cli.log("info", `create node "${node.id}" (${params})`);
|
|
232
|
+
graphNodes.add(node);
|
|
233
|
+
return node;
|
|
234
|
+
},
|
|
235
|
+
connectNode(node1, node2) {
|
|
236
|
+
cli.log("info", `connect node "${node1.id}" to node "${node2.id}"`);
|
|
237
|
+
node1.connect(node2);
|
|
238
|
+
}
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
catch (err) {
|
|
242
|
+
if (err instanceof Error && err.name === "FlowLinkError")
|
|
243
|
+
cli.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}"`);
|
|
244
|
+
else if (err instanceof Error)
|
|
245
|
+
cli.log("error", `failed to materialize SpeechFlow configuration: ${err.message}"`);
|
|
246
|
+
else
|
|
247
|
+
cli.log("error", "failed to materialize SpeechFlow configuration: internal error");
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
188
250
|
/* graph processing: PASS 2: prune connections of nodes */
|
|
189
251
|
for (const node of graphNodes) {
|
|
190
252
|
/* determine connections */
|
|
191
|
-
|
|
192
|
-
|
|
253
|
+
let connectionsIn = Array.from(node.connectionsIn);
|
|
254
|
+
let connectionsOut = Array.from(node.connectionsOut);
|
|
193
255
|
/* ensure necessary incoming links */
|
|
194
256
|
if (node.input !== "none" && connectionsIn.length === 0)
|
|
195
257
|
throw new Error(`node "${node.id}" requires input but has no input nodes connected`);
|
|
@@ -203,6 +265,8 @@ let cli = null;
|
|
|
203
265
|
if (node.output === "none" && connectionsOut.length > 0)
|
|
204
266
|
connectionsOut.forEach((other) => { node.disconnect(other); });
|
|
205
267
|
/* check for payload compatibility */
|
|
268
|
+
connectionsIn = Array.from(node.connectionsIn);
|
|
269
|
+
connectionsOut = Array.from(node.connectionsOut);
|
|
206
270
|
for (const other of connectionsOut)
|
|
207
271
|
if (other.input !== node.output)
|
|
208
272
|
throw new Error(`${node.output} output node "${node.id}" cannot be ` +
|
|
@@ -212,7 +276,7 @@ let cli = null;
|
|
|
212
276
|
for (const node of graphNodes) {
|
|
213
277
|
/* connect node events */
|
|
214
278
|
node.on("log", (level, msg, data) => {
|
|
215
|
-
let str =
|
|
279
|
+
let str = `<${node.id}>: ${msg}`;
|
|
216
280
|
if (data !== undefined)
|
|
217
281
|
str += ` (${JSON.stringify(data)})`;
|
|
218
282
|
cli.log(level, str);
|
|
@@ -224,7 +288,13 @@ let cli = null;
|
|
|
224
288
|
throw new Error(`failed to open node "${node.id}"`);
|
|
225
289
|
});
|
|
226
290
|
}
|
|
227
|
-
/* graph processing: PASS 4:
|
|
291
|
+
/* graph processing: PASS 4: set time zero in all nodes */
|
|
292
|
+
const timeZero = luxon_1.DateTime.now();
|
|
293
|
+
for (const node of graphNodes) {
|
|
294
|
+
cli.log("info", `set time zero in node "${node.id}"`);
|
|
295
|
+
node.setTimeZero(timeZero);
|
|
296
|
+
}
|
|
297
|
+
/* graph processing: PASS 5: connect node streams */
|
|
228
298
|
for (const node of graphNodes) {
|
|
229
299
|
if (node.stream === null)
|
|
230
300
|
throw new Error(`stream of node "${node.id}" still not initialized`);
|
|
@@ -241,13 +311,38 @@ let cli = null;
|
|
|
241
311
|
node.stream.pipe(other.stream);
|
|
242
312
|
}
|
|
243
313
|
}
|
|
314
|
+
/* graph processing: PASS 6: track stream finishing */
|
|
315
|
+
const activeNodes = new Set();
|
|
316
|
+
const finishEvents = new node_events_1.EventEmitter();
|
|
317
|
+
for (const node of graphNodes) {
|
|
318
|
+
if (node.stream === null)
|
|
319
|
+
throw new Error(`stream of node "${node.id}" still not initialized`);
|
|
320
|
+
cli.log("info", `observe stream of node "${node.id}" for finish event`);
|
|
321
|
+
activeNodes.add(node);
|
|
322
|
+
node.stream.on("finish", () => {
|
|
323
|
+
activeNodes.delete(node);
|
|
324
|
+
cli.log("info", `stream of node "${node.id}" finished (${activeNodes.size} nodes remaining active)`);
|
|
325
|
+
if (activeNodes.size === 0) {
|
|
326
|
+
const timeFinished = luxon_1.DateTime.now();
|
|
327
|
+
const duration = timeFinished.diff(timeZero);
|
|
328
|
+
cli.log("info", "everything finished -- stream processing in SpeechFlow graph stops " +
|
|
329
|
+
`(total duration: ${duration.toFormat("hh:mm:ss.SSS")})`);
|
|
330
|
+
finishEvents.emit("finished");
|
|
331
|
+
}
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
/* start of internal stream processing */
|
|
335
|
+
cli.log("info", "everything established -- stream processing in SpeechFlow graph starts");
|
|
244
336
|
/* gracefully shutdown process */
|
|
245
337
|
let shuttingDown = false;
|
|
246
338
|
const shutdown = async (signal) => {
|
|
247
339
|
if (shuttingDown)
|
|
248
340
|
return;
|
|
249
341
|
shuttingDown = true;
|
|
250
|
-
|
|
342
|
+
if (signal === "finished")
|
|
343
|
+
cli.log("info", "streams of all nodes finished -- shutting down service");
|
|
344
|
+
else
|
|
345
|
+
cli.log("warning", `received signal ${signal} -- shutting down service`);
|
|
251
346
|
/* graph processing: PASS 1: disconnect node streams */
|
|
252
347
|
for (const node of graphNodes) {
|
|
253
348
|
if (node.stream === null) {
|
|
@@ -276,7 +371,9 @@ let cli = null;
|
|
|
276
371
|
/* graph processing: PASS 2: close nodes */
|
|
277
372
|
for (const node of graphNodes) {
|
|
278
373
|
cli.log("info", `close node "${node.id}"`);
|
|
279
|
-
await node.close()
|
|
374
|
+
await node.close().catch((err) => {
|
|
375
|
+
cli.log("warning", `node "${node.id}" failed to close: ${err}`);
|
|
376
|
+
});
|
|
280
377
|
}
|
|
281
378
|
/* graph processing: PASS 3: disconnect nodes */
|
|
282
379
|
for (const node of graphNodes) {
|
|
@@ -292,8 +389,14 @@ let cli = null;
|
|
|
292
389
|
graphNodes.delete(node);
|
|
293
390
|
}
|
|
294
391
|
/* terminate process */
|
|
295
|
-
|
|
392
|
+
if (signal === "finished")
|
|
393
|
+
process.exit(0);
|
|
394
|
+
else
|
|
395
|
+
process.exit(1);
|
|
296
396
|
};
|
|
397
|
+
finishEvents.on("finished", () => {
|
|
398
|
+
shutdown("finished");
|
|
399
|
+
});
|
|
297
400
|
process.on("SIGINT", () => {
|
|
298
401
|
shutdown("SIGINT");
|
|
299
402
|
});
|
package/etc/biome.jsonc
CHANGED
|
@@ -5,11 +5,16 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
{
|
|
8
|
+
"root": false,
|
|
8
9
|
"formatter": {
|
|
9
10
|
"enabled": false
|
|
10
11
|
},
|
|
11
|
-
"
|
|
12
|
-
"
|
|
12
|
+
"assist": {
|
|
13
|
+
"actions": {
|
|
14
|
+
"source": {
|
|
15
|
+
"organizeImports": "off"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
13
18
|
},
|
|
14
19
|
"linter": {
|
|
15
20
|
"enabled": true,
|
|
@@ -28,8 +33,11 @@
|
|
|
28
33
|
},
|
|
29
34
|
"suspicious": {
|
|
30
35
|
"noExplicitAny": "off",
|
|
31
|
-
"noAssignInExpressions": "off"
|
|
32
|
-
|
|
36
|
+
"noAssignInExpressions": "off"
|
|
37
|
+
},
|
|
38
|
+
"correctness": {
|
|
39
|
+
"useValidTypeof": "off",
|
|
40
|
+
"noUnusedFunctionParameters": "off"
|
|
33
41
|
}
|
|
34
42
|
}
|
|
35
43
|
}
|
package/etc/stx.conf
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
##
|
|
2
|
+
## SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
## Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
## Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
##
|
|
6
|
+
|
|
7
|
+
# make patches
|
|
8
|
+
patch-make
|
|
9
|
+
npm shrinkwrap && \
|
|
10
|
+
patch-package --patch-dir package.d "@ericedouard/vad-node-realtime" && \
|
|
11
|
+
shx rm -f npm-shrinkwrap.json
|
|
12
|
+
|
|
13
|
+
# [INTERNAL] apply patches
|
|
14
|
+
patch-apply
|
|
15
|
+
patch-package --patch-dir package.d
|
|
16
|
+
|
|
17
|
+
# multiview-style development dashboard
|
|
18
|
+
dev
|
|
19
|
+
stmux -w always -m beep -e "built.in.+ms" -- \
|
|
20
|
+
[ -s 35% "stx lint-watch" : \
|
|
21
|
+
-s 15% "stx build-watch" : \
|
|
22
|
+
-s 30% "stx server-delay server-watch" ]
|
|
23
|
+
|
|
24
|
+
# static code analysis (linting)
|
|
25
|
+
lint:
|
|
26
|
+
tsc --project etc/tsconfig.json --noEmit && \
|
|
27
|
+
oxlint --config etc/oxlint.jsonc src/**/*.ts && \
|
|
28
|
+
biome lint --diagnostic-level=warn --config-path=etc/biome.jsonc src/*.ts && \
|
|
29
|
+
eslint --config etc/eslint.mjs src/**/*.ts
|
|
30
|
+
|
|
31
|
+
# static code analysis (linting) with file watching
|
|
32
|
+
lint-watch
|
|
33
|
+
nodemon --exec "stx lint" --watch src --ext ts
|
|
34
|
+
|
|
35
|
+
# code compilation/transpiling (building)
|
|
36
|
+
build
|
|
37
|
+
tsc --project etc/tsconfig.json && \
|
|
38
|
+
(echo "#!/usr/bin/env node"; cat dst/speechflow.js) >dst/speechflow.js.new && \
|
|
39
|
+
mv dst/speechflow.js.new dst/speechflow.js
|
|
40
|
+
|
|
41
|
+
# code compilation/transpiling (building) with file watching
|
|
42
|
+
build-watch
|
|
43
|
+
nodemon --exec "stx build" --watch src --ext ts
|
|
44
|
+
|
|
45
|
+
# [INTERNAL] wait for server
|
|
46
|
+
server-delay
|
|
47
|
+
delay 2.0
|
|
48
|
+
|
|
49
|
+
# run program
|
|
50
|
+
server
|
|
51
|
+
node dst/speechflow.js -v info -c studio@sample.yaml
|
|
52
|
+
|
|
53
|
+
# run program with file watching
|
|
54
|
+
server-watch
|
|
55
|
+
cross-env NODE_OPTIONS="--enable-source-maps" \
|
|
56
|
+
nodemon --exec "stx server" --watch dst --ext ts --delay 1.0
|
|
57
|
+
|
|
58
|
+
# remove all files regularly generated
|
|
59
|
+
clean
|
|
60
|
+
shx rm -rf dst
|
|
61
|
+
|
|
62
|
+
# remove all files generated
|
|
63
|
+
clean-dist : clean
|
|
64
|
+
shx rm -rf node_modules
|
|
65
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
diff --git a/node_modules/@ericedouard/vad-node-realtime/dist/common/models/v5.js b/node_modules/@ericedouard/vad-node-realtime/dist/common/models/v5.js
|
|
2
|
+
index 4a75457..e3a4e9a 100644
|
|
3
|
+
--- a/node_modules/@ericedouard/vad-node-realtime/dist/common/models/v5.js
|
|
4
|
+
+++ b/node_modules/@ericedouard/vad-node-realtime/dist/common/models/v5.js
|
|
5
|
+
@@ -39,13 +39,11 @@ class SileroV5 {
|
|
6
|
+
exports.SileroV5 = SileroV5;
|
|
7
|
+
_a = SileroV5;
|
|
8
|
+
SileroV5.new = async (ortInstance, modelFetcher) => {
|
|
9
|
+
- logging_1.log.debug("Loading VAD...");
|
|
10
|
+
const modelArrayBuffer = await modelFetcher();
|
|
11
|
+
const _session = await ortInstance.InferenceSession.create(modelArrayBuffer);
|
|
12
|
+
// @ts-ignore
|
|
13
|
+
const _sr = new ortInstance.Tensor("int64", [16000n]);
|
|
14
|
+
const _state = getNewState(ortInstance);
|
|
15
|
+
- logging_1.log.debug("...finished loading VAD");
|
|
16
|
+
return new _a(_session, _state, _sr, ortInstance);
|
|
17
|
+
};
|
|
18
|
+
//# sourceMappingURL=v5.js.map
|