speechflow 0.9.4 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +227 -54
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
- package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
- package/dst/speechflow-node-a2a-wav.d.ts +11 -0
- package/dst/speechflow-node-a2a-wav.js +170 -0
- package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
- package/dst/speechflow-node-a2t-deepgram.js +220 -0
- package/dst/speechflow-node-deepgram.d.ts +3 -1
- package/dst/speechflow-node-deepgram.js +86 -22
- package/dst/speechflow-node-deepl.d.ts +3 -1
- package/dst/speechflow-node-deepl.js +25 -20
- package/dst/speechflow-node-device.d.ts +3 -1
- package/dst/speechflow-node-device.js +53 -2
- package/dst/speechflow-node-elevenlabs.d.ts +4 -1
- package/dst/speechflow-node-elevenlabs.js +88 -49
- package/dst/speechflow-node-ffmpeg.d.ts +3 -1
- package/dst/speechflow-node-ffmpeg.js +42 -4
- package/dst/speechflow-node-file.d.ts +3 -1
- package/dst/speechflow-node-file.js +84 -13
- package/dst/speechflow-node-format.d.ts +11 -0
- package/dst/speechflow-node-format.js +80 -0
- package/dst/speechflow-node-gemma.d.ts +3 -1
- package/dst/speechflow-node-gemma.js +84 -23
- package/dst/speechflow-node-mqtt.d.ts +13 -0
- package/dst/speechflow-node-mqtt.js +181 -0
- package/dst/speechflow-node-opus.d.ts +12 -0
- package/dst/speechflow-node-opus.js +135 -0
- package/dst/speechflow-node-subtitle.d.ts +12 -0
- package/dst/speechflow-node-subtitle.js +96 -0
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
- package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
- package/dst/speechflow-node-t2t-deepl.js +133 -0
- package/dst/speechflow-node-t2t-format.d.ts +11 -0
- package/dst/speechflow-node-t2t-format.js +80 -0
- package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
- package/dst/speechflow-node-t2t-gemma.js +213 -0
- package/dst/speechflow-node-t2t-opus.d.ts +12 -0
- package/dst/speechflow-node-t2t-opus.js +135 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
- package/dst/speechflow-node-t2t-subtitle.js +96 -0
- package/dst/speechflow-node-trace.d.ts +11 -0
- package/dst/speechflow-node-trace.js +88 -0
- package/dst/speechflow-node-wav.d.ts +11 -0
- package/dst/speechflow-node-wav.js +170 -0
- package/dst/speechflow-node-websocket.d.ts +3 -1
- package/dst/speechflow-node-websocket.js +149 -49
- package/dst/speechflow-node-whisper-common.d.ts +34 -0
- package/dst/speechflow-node-whisper-common.js +7 -0
- package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-ggml.js +97 -0
- package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker.js +116 -0
- package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker2.js +82 -0
- package/dst/speechflow-node-whisper.d.ts +19 -0
- package/dst/speechflow-node-whisper.js +604 -0
- package/dst/speechflow-node-x2x-trace.d.ts +11 -0
- package/dst/speechflow-node-x2x-trace.js +88 -0
- package/dst/speechflow-node-xio-device.d.ts +13 -0
- package/dst/speechflow-node-xio-device.js +205 -0
- package/dst/speechflow-node-xio-file.d.ts +11 -0
- package/dst/speechflow-node-xio-file.js +176 -0
- package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
- package/dst/speechflow-node-xio-mqtt.js +181 -0
- package/dst/speechflow-node-xio-websocket.d.ts +13 -0
- package/dst/speechflow-node-xio-websocket.js +275 -0
- package/dst/speechflow-node.d.ts +25 -7
- package/dst/speechflow-node.js +74 -9
- package/dst/speechflow-utils.d.ts +23 -0
- package/dst/speechflow-utils.js +194 -0
- package/dst/speechflow.js +146 -43
- package/etc/biome.jsonc +12 -4
- package/etc/stx.conf +65 -0
- package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
- package/package.json +49 -31
- package/sample.yaml +61 -23
- package/src/lib.d.ts +6 -1
- package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
- package/src/speechflow-node-a2a-wav.ts +143 -0
- package/src/speechflow-node-a2t-deepgram.ts +199 -0
- package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
- package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
- package/src/speechflow-node-t2t-format.ts +85 -0
- package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
- package/src/speechflow-node-t2t-opus.ts +111 -0
- package/src/speechflow-node-t2t-subtitle.ts +101 -0
- package/src/speechflow-node-x2x-trace.ts +92 -0
- package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
- package/src/speechflow-node-xio-file.ts +153 -0
- package/src/speechflow-node-xio-mqtt.ts +154 -0
- package/src/speechflow-node-xio-websocket.ts +248 -0
- package/src/speechflow-node.ts +78 -13
- package/src/speechflow-utils.ts +212 -0
- package/src/speechflow.ts +150 -43
- package/etc/nps.yaml +0 -40
- package/src/speechflow-node-deepgram.ts +0 -133
- package/src/speechflow-node-elevenlabs.ts +0 -116
- package/src/speechflow-node-file.ts +0 -108
- package/src/speechflow-node-websocket.ts +0 -179
package/src/speechflow.ts
CHANGED
|
@@ -5,9 +5,12 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
8
9
|
import Stream from "node:stream"
|
|
10
|
+
import { EventEmitter } from "node:events"
|
|
9
11
|
|
|
10
12
|
/* external dependencies */
|
|
13
|
+
import { DateTime } from "luxon"
|
|
11
14
|
import CLIio from "cli-io"
|
|
12
15
|
import yargs from "yargs"
|
|
13
16
|
import jsYAML from "js-yaml"
|
|
@@ -15,6 +18,7 @@ import FlowLink from "flowlink"
|
|
|
15
18
|
import objectPath from "object-path"
|
|
16
19
|
import installedPackages from "installed-packages"
|
|
17
20
|
import dotenvx from "@dotenvx/dotenvx"
|
|
21
|
+
import syspath from "syspath"
|
|
18
22
|
|
|
19
23
|
/* internal dependencies */
|
|
20
24
|
import SpeechFlowNode from "./speechflow-node"
|
|
@@ -25,6 +29,12 @@ let cli: CLIio | null = null
|
|
|
25
29
|
|
|
26
30
|
/* establish asynchronous environment */
|
|
27
31
|
;(async () => {
|
|
32
|
+
/* determine system paths */
|
|
33
|
+
const { dataDir } = syspath({
|
|
34
|
+
appName: "speechflow",
|
|
35
|
+
dataDirAutoCreate: true
|
|
36
|
+
})
|
|
37
|
+
|
|
28
38
|
/* parse command-line arguments */
|
|
29
39
|
const args = await yargs()
|
|
30
40
|
/* eslint @stylistic/indent: off */
|
|
@@ -33,9 +43,10 @@ let cli: CLIio | null = null
|
|
|
33
43
|
"[-h|--help] " +
|
|
34
44
|
"[-V|--version] " +
|
|
35
45
|
"[-v|--verbose <level>] " +
|
|
46
|
+
"[-C|--cache <directory>] " +
|
|
36
47
|
"[-e|--expression <expression>] " +
|
|
37
|
-
"[-f|--
|
|
38
|
-
"[-c|--config <
|
|
48
|
+
"[-f|--file <file>] " +
|
|
49
|
+
"[-c|--config <id>@<yaml-config-file>] " +
|
|
39
50
|
"[<argument> [...]]"
|
|
40
51
|
)
|
|
41
52
|
.help("h").alias("h", "help").default("h", false)
|
|
@@ -44,12 +55,14 @@ let cli: CLIio | null = null
|
|
|
44
55
|
.describe("V", "show program version information")
|
|
45
56
|
.string("v").nargs("v", 1).alias("v", "log-level").default("v", "warning")
|
|
46
57
|
.describe("v", "level for verbose logging ('none', 'error', 'warning', 'info', 'debug')")
|
|
58
|
+
.string("C").nargs("C", 1).alias("C", "cache").default("C", path.join(dataDir, "cache"))
|
|
59
|
+
.describe("C", "directory for cached files (primarily AI model files)")
|
|
47
60
|
.string("e").nargs("e", 1).alias("e", "expression").default("e", "")
|
|
48
|
-
.describe("e", "FlowLink expression")
|
|
49
|
-
.string("f").nargs("f", 1).alias("f", "
|
|
61
|
+
.describe("e", "FlowLink expression string")
|
|
62
|
+
.string("f").nargs("f", 1).alias("f", "file").default("f", "")
|
|
50
63
|
.describe("f", "FlowLink expression file")
|
|
51
64
|
.string("c").nargs("c", 1).alias("c", "config-file").default("c", "")
|
|
52
|
-
.describe("c", "
|
|
65
|
+
.describe("c", "FlowLink expression reference into YAML file (in format <id>@<file>)")
|
|
53
66
|
.version(false)
|
|
54
67
|
.strict()
|
|
55
68
|
.showHelpOnFail(true)
|
|
@@ -128,14 +141,20 @@ let cli: CLIio | null = null
|
|
|
128
141
|
|
|
129
142
|
/* load internal SpeechFlow nodes */
|
|
130
143
|
const pkgsI = [
|
|
131
|
-
"./speechflow-node-
|
|
132
|
-
"./speechflow-node-
|
|
133
|
-
"./speechflow-node-
|
|
134
|
-
"./speechflow-node-
|
|
135
|
-
"./speechflow-node-
|
|
136
|
-
"./speechflow-node-
|
|
137
|
-
"./speechflow-node-
|
|
138
|
-
"./speechflow-node-
|
|
144
|
+
"./speechflow-node-a2a-ffmpeg.js",
|
|
145
|
+
"./speechflow-node-a2a-wav.js",
|
|
146
|
+
"./speechflow-node-a2t-deepgram.js",
|
|
147
|
+
"./speechflow-node-t2a-elevenlabs.js",
|
|
148
|
+
"./speechflow-node-t2t-deepl.js",
|
|
149
|
+
"./speechflow-node-t2t-format.js",
|
|
150
|
+
"./speechflow-node-t2t-gemma.js",
|
|
151
|
+
"./speechflow-node-t2t-opus.js",
|
|
152
|
+
"./speechflow-node-t2t-subtitle.js",
|
|
153
|
+
"./speechflow-node-x2x-trace.js",
|
|
154
|
+
"./speechflow-node-xio-device.js",
|
|
155
|
+
"./speechflow-node-xio-file.js",
|
|
156
|
+
"./speechflow-node-xio-mqtt.js",
|
|
157
|
+
"./speechflow-node-xio-websocket.js"
|
|
139
158
|
]
|
|
140
159
|
for (const pkg of pkgsI) {
|
|
141
160
|
let node: any = await import(pkg)
|
|
@@ -175,35 +194,79 @@ let cli: CLIio | null = null
|
|
|
175
194
|
let nodenum = 1
|
|
176
195
|
const variables = { argv: args._, env: process.env }
|
|
177
196
|
const graphNodes = new Set<SpeechFlowNode>()
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
cli!.log("
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
197
|
+
const cfg = {
|
|
198
|
+
audioChannels: 1,
|
|
199
|
+
audioBitDepth: 16,
|
|
200
|
+
audioLittleEndian: true,
|
|
201
|
+
audioSampleRate: 48000,
|
|
202
|
+
textEncoding: "utf8",
|
|
203
|
+
cacheDir: args.cache
|
|
204
|
+
}
|
|
205
|
+
let ast: unknown
|
|
206
|
+
try {
|
|
207
|
+
ast = flowlink.compile(config)
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
if (err instanceof Error && err.name === "FlowLinkError")
|
|
211
|
+
cli!.log("error", `failed to parse SpeechFlow configuration: ${err.toString()}"`)
|
|
212
|
+
else if (err instanceof Error)
|
|
213
|
+
cli!.log("error", `failed to parse SpeechFlow configuration: ${err.message}"`)
|
|
214
|
+
else
|
|
215
|
+
cli!.log("error", "failed to parse SpeechFlow configuration: internal error")
|
|
216
|
+
process.exit(1)
|
|
217
|
+
}
|
|
218
|
+
try {
|
|
219
|
+
flowlink.execute(ast, {
|
|
220
|
+
resolveVariable (id: string) {
|
|
221
|
+
if (!objectPath.has(variables, id))
|
|
222
|
+
throw new Error(`failed to resolve variable "${id}"`)
|
|
223
|
+
const value = objectPath.get(variables, id)
|
|
224
|
+
cli!.log("info", `resolve variable: "${id}" -> "${value}"`)
|
|
225
|
+
return value
|
|
226
|
+
},
|
|
227
|
+
createNode (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
228
|
+
if (nodes[id] === undefined)
|
|
229
|
+
throw new Error(`unknown node "${id}"`)
|
|
230
|
+
let node: SpeechFlowNode
|
|
231
|
+
try {
|
|
232
|
+
node = new nodes[id](`${id}[${nodenum}]`, cfg, opts, args)
|
|
233
|
+
}
|
|
234
|
+
catch (err) {
|
|
235
|
+
/* fatal error */
|
|
236
|
+
if (err instanceof Error)
|
|
237
|
+
cli!.log("error", `creation of "${id}[${nodenum}]" node failed: ${err.message}`)
|
|
238
|
+
else
|
|
239
|
+
cli!.log("error", `creation of "${id}"[${nodenum}] node failed: ${err}`)
|
|
240
|
+
process.exit(1)
|
|
241
|
+
}
|
|
242
|
+
nodenum++
|
|
243
|
+
const params = Object.keys(node.params)
|
|
244
|
+
.map((key) => `${key}: ${JSON.stringify(node.params[key])}`).join(", ")
|
|
245
|
+
cli!.log("info", `create node "${node.id}" (${params})`)
|
|
246
|
+
graphNodes.add(node)
|
|
247
|
+
return node
|
|
248
|
+
},
|
|
249
|
+
connectNode (node1: SpeechFlowNode, node2: SpeechFlowNode) {
|
|
250
|
+
cli!.log("info", `connect node "${node1.id}" to node "${node2.id}"`)
|
|
251
|
+
node1.connect(node2)
|
|
252
|
+
}
|
|
253
|
+
})
|
|
254
|
+
}
|
|
255
|
+
catch (err) {
|
|
256
|
+
if (err instanceof Error && err.name === "FlowLinkError")
|
|
257
|
+
cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}"`)
|
|
258
|
+
else if (err instanceof Error)
|
|
259
|
+
cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.message}"`)
|
|
260
|
+
else
|
|
261
|
+
cli!.log("error", "failed to materialize SpeechFlow configuration: internal error")
|
|
262
|
+
process.exit(1)
|
|
263
|
+
}
|
|
201
264
|
|
|
202
265
|
/* graph processing: PASS 2: prune connections of nodes */
|
|
203
266
|
for (const node of graphNodes) {
|
|
204
267
|
/* determine connections */
|
|
205
|
-
|
|
206
|
-
|
|
268
|
+
let connectionsIn = Array.from(node.connectionsIn)
|
|
269
|
+
let connectionsOut = Array.from(node.connectionsOut)
|
|
207
270
|
|
|
208
271
|
/* ensure necessary incoming links */
|
|
209
272
|
if (node.input !== "none" && connectionsIn.length === 0)
|
|
@@ -222,6 +285,8 @@ let cli: CLIio | null = null
|
|
|
222
285
|
connectionsOut.forEach((other) => { node.disconnect(other) })
|
|
223
286
|
|
|
224
287
|
/* check for payload compatibility */
|
|
288
|
+
connectionsIn = Array.from(node.connectionsIn)
|
|
289
|
+
connectionsOut = Array.from(node.connectionsOut)
|
|
225
290
|
for (const other of connectionsOut)
|
|
226
291
|
if (other.input !== node.output)
|
|
227
292
|
throw new Error(`${node.output} output node "${node.id}" cannot be ` +
|
|
@@ -232,7 +297,7 @@ let cli: CLIio | null = null
|
|
|
232
297
|
for (const node of graphNodes) {
|
|
233
298
|
/* connect node events */
|
|
234
299
|
node.on("log", (level: string, msg: string, data?: any) => {
|
|
235
|
-
let str =
|
|
300
|
+
let str = `<${node.id}>: ${msg}`
|
|
236
301
|
if (data !== undefined)
|
|
237
302
|
str += ` (${JSON.stringify(data)})`
|
|
238
303
|
cli!.log(level, str)
|
|
@@ -246,7 +311,14 @@ let cli: CLIio | null = null
|
|
|
246
311
|
})
|
|
247
312
|
}
|
|
248
313
|
|
|
249
|
-
/* graph processing: PASS 4:
|
|
314
|
+
/* graph processing: PASS 4: set time zero in all nodes */
|
|
315
|
+
const timeZero = DateTime.now()
|
|
316
|
+
for (const node of graphNodes) {
|
|
317
|
+
cli!.log("info", `set time zero in node "${node.id}"`)
|
|
318
|
+
node.setTimeZero(timeZero)
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/* graph processing: PASS 5: connect node streams */
|
|
250
322
|
for (const node of graphNodes) {
|
|
251
323
|
if (node.stream === null)
|
|
252
324
|
throw new Error(`stream of node "${node.id}" still not initialized`)
|
|
@@ -264,13 +336,40 @@ let cli: CLIio | null = null
|
|
|
264
336
|
}
|
|
265
337
|
}
|
|
266
338
|
|
|
339
|
+
/* graph processing: PASS 6: track stream finishing */
|
|
340
|
+
const activeNodes = new Set<SpeechFlowNode>()
|
|
341
|
+
const finishEvents = new EventEmitter()
|
|
342
|
+
for (const node of graphNodes) {
|
|
343
|
+
if (node.stream === null)
|
|
344
|
+
throw new Error(`stream of node "${node.id}" still not initialized`)
|
|
345
|
+
cli!.log("info", `observe stream of node "${node.id}" for finish event`)
|
|
346
|
+
activeNodes.add(node)
|
|
347
|
+
node.stream.on("finish", () => {
|
|
348
|
+
activeNodes.delete(node)
|
|
349
|
+
cli!.log("info", `stream of node "${node.id}" finished (${activeNodes.size} nodes remaining active)`)
|
|
350
|
+
if (activeNodes.size === 0) {
|
|
351
|
+
const timeFinished = DateTime.now()
|
|
352
|
+
const duration = timeFinished.diff(timeZero)
|
|
353
|
+
cli!.log("info", "everything finished -- stream processing in SpeechFlow graph stops " +
|
|
354
|
+
`(total duration: ${duration.toFormat("hh:mm:ss.SSS")})`)
|
|
355
|
+
finishEvents.emit("finished")
|
|
356
|
+
}
|
|
357
|
+
})
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/* start of internal stream processing */
|
|
361
|
+
cli!.log("info", "everything established -- stream processing in SpeechFlow graph starts")
|
|
362
|
+
|
|
267
363
|
/* gracefully shutdown process */
|
|
268
364
|
let shuttingDown = false
|
|
269
365
|
const shutdown = async (signal: string) => {
|
|
270
366
|
if (shuttingDown)
|
|
271
367
|
return
|
|
272
368
|
shuttingDown = true
|
|
273
|
-
|
|
369
|
+
if (signal === "finished")
|
|
370
|
+
cli!.log("info", "streams of all nodes finished -- shutting down service")
|
|
371
|
+
else
|
|
372
|
+
cli!.log("warning", `received signal ${signal} -- shutting down service`)
|
|
274
373
|
|
|
275
374
|
/* graph processing: PASS 1: disconnect node streams */
|
|
276
375
|
for (const node of graphNodes) {
|
|
@@ -301,7 +400,9 @@ let cli: CLIio | null = null
|
|
|
301
400
|
/* graph processing: PASS 2: close nodes */
|
|
302
401
|
for (const node of graphNodes) {
|
|
303
402
|
cli!.log("info", `close node "${node.id}"`)
|
|
304
|
-
await node.close()
|
|
403
|
+
await node.close().catch((err) => {
|
|
404
|
+
cli!.log("warning", `node "${node.id}" failed to close: ${err}`)
|
|
405
|
+
})
|
|
305
406
|
}
|
|
306
407
|
|
|
307
408
|
/* graph processing: PASS 3: disconnect nodes */
|
|
@@ -320,8 +421,14 @@ let cli: CLIio | null = null
|
|
|
320
421
|
}
|
|
321
422
|
|
|
322
423
|
/* terminate process */
|
|
323
|
-
|
|
424
|
+
if (signal === "finished")
|
|
425
|
+
process.exit(0)
|
|
426
|
+
else
|
|
427
|
+
process.exit(1)
|
|
324
428
|
}
|
|
429
|
+
finishEvents.on("finished", () => {
|
|
430
|
+
shutdown("finished")
|
|
431
|
+
})
|
|
325
432
|
process.on("SIGINT", () => {
|
|
326
433
|
shutdown("SIGINT")
|
|
327
434
|
})
|
package/etc/nps.yaml
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
## SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
## Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
-
## Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
-
##
|
|
6
|
-
|
|
7
|
-
scripts:
|
|
8
|
-
# multiview-style development dashboard
|
|
9
|
-
dev: >
|
|
10
|
-
stmux -w always -m beep -e "built.in.+ms" --
|
|
11
|
-
[ -s 35% "npm start lint-watch" :
|
|
12
|
-
-s 15% "npm start build-watch" :
|
|
13
|
-
-s 30% "npm start server-delay server-watch" ]
|
|
14
|
-
|
|
15
|
-
# static code analysis (linting)
|
|
16
|
-
lint-watch: nodemon --exec "npm start lint" --watch src --ext ts
|
|
17
|
-
lint: npm start lint-tsc lint-oxlint lint-biome lint-eslint
|
|
18
|
-
lint-tsc: tsc --project etc/tsconfig.json --noEmit
|
|
19
|
-
lint-oxlint: oxlint --config etc/oxlint.jsonc src/**/*.ts
|
|
20
|
-
lint-biome: biome lint --diagnostic-level=warn --config-path=etc/biome.jsonc src/*.ts
|
|
21
|
-
lint-eslint: eslint --config etc/eslint.mjs src/**/*.ts
|
|
22
|
-
|
|
23
|
-
# code compilation/transpiling (building)
|
|
24
|
-
build: >
|
|
25
|
-
tsc --project etc/tsconfig.json &&
|
|
26
|
-
(echo "#!/usr/bin/env node"; cat dst/speechflow.js) >dst/speechflow.js.new &&
|
|
27
|
-
mv dst/speechflow.js.new dst/speechflow.js
|
|
28
|
-
build-watch: nodemon --exec "npm start build" --watch src --ext ts
|
|
29
|
-
|
|
30
|
-
# start server run-time
|
|
31
|
-
server-delay: delay 2.0
|
|
32
|
-
server: node dst/speechflow.js -v info -c sample@sample.yaml
|
|
33
|
-
server-watch: >
|
|
34
|
-
cross-env NODE_OPTIONS="--enable-source-maps"
|
|
35
|
-
nodemon --exec "npm start server" --watch dst --ext ts --delay 1.0
|
|
36
|
-
|
|
37
|
-
# cleanup filesystem
|
|
38
|
-
clean: rimraf dst
|
|
39
|
-
clean-dist: rimraf dst node_modules
|
|
40
|
-
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
-
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/* standard dependencies */
|
|
8
|
-
import { EventEmitter } from "node:events"
|
|
9
|
-
|
|
10
|
-
/* external dependencies */
|
|
11
|
-
import Stream from "node:stream"
|
|
12
|
-
import * as Deepgram from "@deepgram/sdk"
|
|
13
|
-
|
|
14
|
-
/* internal dependencies */
|
|
15
|
-
import SpeechFlowNode from "./speechflow-node"
|
|
16
|
-
|
|
17
|
-
/* SpeechFlow node for Deepgram speech-to-text conversion */
|
|
18
|
-
export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
19
|
-
/* declare official node name */
|
|
20
|
-
public static name = "deepgram"
|
|
21
|
-
|
|
22
|
-
/* internal state */
|
|
23
|
-
private dg: Deepgram.LiveClient | null = null
|
|
24
|
-
|
|
25
|
-
/* construct node */
|
|
26
|
-
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
27
|
-
super(id, opts, args)
|
|
28
|
-
|
|
29
|
-
/* declare node configuration parameters */
|
|
30
|
-
this.configure({
|
|
31
|
-
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
|
|
32
|
-
model: { type: "string", val: "nova-3", pos: 0 },
|
|
33
|
-
version: { type: "string", val: "latest", pos: 1 },
|
|
34
|
-
language: { type: "string", val: "multi", pos: 2 }
|
|
35
|
-
})
|
|
36
|
-
|
|
37
|
-
/* declare node input/output format */
|
|
38
|
-
this.input = "audio"
|
|
39
|
-
this.output = "text"
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/* open node */
|
|
43
|
-
async open () {
|
|
44
|
-
/* sanity check situation */
|
|
45
|
-
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
46
|
-
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
47
|
-
|
|
48
|
-
/* create queue for results */
|
|
49
|
-
const queue = new EventEmitter()
|
|
50
|
-
|
|
51
|
-
/* connect to Deepgram API */
|
|
52
|
-
const deepgram = Deepgram.createClient(this.params.key)
|
|
53
|
-
this.dg = deepgram.listen.live({
|
|
54
|
-
model: this.params.model,
|
|
55
|
-
version: this.params.version,
|
|
56
|
-
language: this.params.language,
|
|
57
|
-
channels: this.config.audioChannels,
|
|
58
|
-
sample_rate: this.config.audioSampleRate,
|
|
59
|
-
encoding: "linear16",
|
|
60
|
-
multichannel: false,
|
|
61
|
-
endpointing: 10,
|
|
62
|
-
interim_results: false,
|
|
63
|
-
smart_format: true,
|
|
64
|
-
punctuate: true,
|
|
65
|
-
filler_words: true,
|
|
66
|
-
diarize: true,
|
|
67
|
-
numerals: true,
|
|
68
|
-
paragraphs: true,
|
|
69
|
-
profanity_filter: true,
|
|
70
|
-
utterances: false
|
|
71
|
-
})
|
|
72
|
-
|
|
73
|
-
/* hook onto Deepgram API events */
|
|
74
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
75
|
-
const text = data.channel?.alternatives[0].transcript ?? ""
|
|
76
|
-
if (text === "")
|
|
77
|
-
return
|
|
78
|
-
queue.emit("text", text)
|
|
79
|
-
})
|
|
80
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
81
|
-
this.log("info", "Deepgram: metadata received")
|
|
82
|
-
})
|
|
83
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
84
|
-
this.log("info", "Deepgram: connection close")
|
|
85
|
-
})
|
|
86
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
87
|
-
this.log("error", `Deepgram: ${error.message}`)
|
|
88
|
-
this.emit("error")
|
|
89
|
-
})
|
|
90
|
-
|
|
91
|
-
/* wait for Deepgram API to be available */
|
|
92
|
-
await new Promise((resolve) => {
|
|
93
|
-
this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
94
|
-
this.log("info", "Deepgram: connection open")
|
|
95
|
-
resolve(true)
|
|
96
|
-
})
|
|
97
|
-
})
|
|
98
|
-
|
|
99
|
-
/* provide Duplex stream and internally attach to Deepgram API */
|
|
100
|
-
const dg = this.dg
|
|
101
|
-
this.stream = new Stream.Duplex({
|
|
102
|
-
write (chunk: Buffer, encoding, callback) {
|
|
103
|
-
const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
|
|
104
|
-
if (data.byteLength === 0)
|
|
105
|
-
queue.emit("text", "")
|
|
106
|
-
else
|
|
107
|
-
dg.send(data)
|
|
108
|
-
callback()
|
|
109
|
-
},
|
|
110
|
-
read (size) {
|
|
111
|
-
queue.once("text", (text: string) => {
|
|
112
|
-
this.push(text)
|
|
113
|
-
})
|
|
114
|
-
},
|
|
115
|
-
final (callback) {
|
|
116
|
-
dg.requestClose()
|
|
117
|
-
}
|
|
118
|
-
})
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/* close node */
|
|
122
|
-
async close () {
|
|
123
|
-
/* close stream */
|
|
124
|
-
if (this.stream !== null) {
|
|
125
|
-
this.stream.destroy()
|
|
126
|
-
this.stream = null
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/* shutdown Deepgram API */
|
|
130
|
-
if (this.dg !== null)
|
|
131
|
-
this.dg.requestClose()
|
|
132
|
-
}
|
|
133
|
-
}
|
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
-
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/* standard dependencies */
|
|
8
|
-
import Stream from "node:stream"
|
|
9
|
-
import { EventEmitter } from "node:events"
|
|
10
|
-
|
|
11
|
-
/* external dependencies */
|
|
12
|
-
import * as ElevenLabs from "elevenlabs"
|
|
13
|
-
import { getStreamAsBuffer } from "get-stream"
|
|
14
|
-
|
|
15
|
-
/* internal dependencies */
|
|
16
|
-
import SpeechFlowNode from "./speechflow-node"
|
|
17
|
-
|
|
18
|
-
/*
|
|
19
|
-
const elevenlabsVoices = {
|
|
20
|
-
"drew": { name: "Drew", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
21
|
-
"george": { name: "George", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
22
|
-
"bill": { name: "Bill", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
23
|
-
"daniel": { name: "Daniel", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
|
|
24
|
-
"brian": { name: "Brian", model: "eleven_turbo_v2", lang: [ "en" ] },
|
|
25
|
-
"sarah": { name: "Sarah", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
26
|
-
"racel": { name: "Racel", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
27
|
-
"grace": { name: "Grace", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
|
|
28
|
-
"matilda": { name: "Matilda", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
|
|
29
|
-
"alice": { name: "Alice", model: "eleven_turbo_v2", lang: [ "en" ] }
|
|
30
|
-
}
|
|
31
|
-
*/
|
|
32
|
-
|
|
33
|
-
export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
34
|
-
/* declare official node name */
|
|
35
|
-
public static name = "elevenlabs"
|
|
36
|
-
|
|
37
|
-
/* internal state */
|
|
38
|
-
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
39
|
-
|
|
40
|
-
/* construct node */
|
|
41
|
-
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
42
|
-
super(id, opts, args)
|
|
43
|
-
|
|
44
|
-
/* declare node configuration parameters */
|
|
45
|
-
this.configure({
|
|
46
|
-
key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
|
|
47
|
-
voice: { type: "string", val: "Brian", pos: 0 },
|
|
48
|
-
language: { type: "string", val: "de", pos: 1 }
|
|
49
|
-
})
|
|
50
|
-
|
|
51
|
-
/* declare node input/output format */
|
|
52
|
-
this.input = "text"
|
|
53
|
-
this.output = "audio"
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/* open node */
|
|
57
|
-
async open () {
|
|
58
|
-
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
59
|
-
apiKey: this.params.key
|
|
60
|
-
})
|
|
61
|
-
const voices = await this.elevenlabs.voices.getAll()
|
|
62
|
-
const voice = voices.voices.find((voice) => voice.name === this.params.voice)
|
|
63
|
-
if (voice === undefined)
|
|
64
|
-
throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
|
|
65
|
-
const speechStream = (text: string) => {
|
|
66
|
-
return this.elevenlabs!.textToSpeech.convert(voice.voice_id, {
|
|
67
|
-
text,
|
|
68
|
-
optimize_streaming_latency: 2,
|
|
69
|
-
output_format: "pcm_16000", // S16LE
|
|
70
|
-
model_id: "eleven_flash_v2_5",
|
|
71
|
-
/*
|
|
72
|
-
voice_settings: {
|
|
73
|
-
stability: 0,
|
|
74
|
-
similarity_boost: 0
|
|
75
|
-
}
|
|
76
|
-
*/
|
|
77
|
-
}, {
|
|
78
|
-
timeoutInSeconds: 30,
|
|
79
|
-
maxRetries: 10
|
|
80
|
-
})
|
|
81
|
-
}
|
|
82
|
-
const queue = new EventEmitter()
|
|
83
|
-
this.stream = new Stream.Duplex({
|
|
84
|
-
write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
|
|
85
|
-
if (encoding !== "utf8" && encoding !== "utf-8")
|
|
86
|
-
callback(new Error("only text input supported by Elevenlabs node"))
|
|
87
|
-
const data = chunk.toString()
|
|
88
|
-
speechStream(data).then((stream) => {
|
|
89
|
-
getStreamAsBuffer(stream).then((buffer) => {
|
|
90
|
-
queue.emit("audio", buffer)
|
|
91
|
-
callback()
|
|
92
|
-
}).catch((error) => {
|
|
93
|
-
callback(error)
|
|
94
|
-
})
|
|
95
|
-
}).catch((error) => {
|
|
96
|
-
callback(error)
|
|
97
|
-
})
|
|
98
|
-
},
|
|
99
|
-
read (size: number) {
|
|
100
|
-
queue.once("audio", (buffer: Buffer) => {
|
|
101
|
-
this.push(buffer, "binary")
|
|
102
|
-
})
|
|
103
|
-
}
|
|
104
|
-
})
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/* close node */
|
|
108
|
-
async close () {
|
|
109
|
-
/* destroy stream */
|
|
110
|
-
if (this.stream !== null) {
|
|
111
|
-
this.stream.destroy()
|
|
112
|
-
this.stream = null
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|