speechflow 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +227 -54
  3. package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
  5. package/dst/speechflow-node-a2a-wav.d.ts +11 -0
  6. package/dst/speechflow-node-a2a-wav.js +170 -0
  7. package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
  8. package/dst/speechflow-node-a2t-deepgram.js +220 -0
  9. package/dst/speechflow-node-deepgram.d.ts +3 -1
  10. package/dst/speechflow-node-deepgram.js +86 -22
  11. package/dst/speechflow-node-deepl.d.ts +3 -1
  12. package/dst/speechflow-node-deepl.js +25 -20
  13. package/dst/speechflow-node-device.d.ts +3 -1
  14. package/dst/speechflow-node-device.js +53 -2
  15. package/dst/speechflow-node-elevenlabs.d.ts +4 -1
  16. package/dst/speechflow-node-elevenlabs.js +88 -49
  17. package/dst/speechflow-node-ffmpeg.d.ts +3 -1
  18. package/dst/speechflow-node-ffmpeg.js +42 -4
  19. package/dst/speechflow-node-file.d.ts +3 -1
  20. package/dst/speechflow-node-file.js +84 -13
  21. package/dst/speechflow-node-format.d.ts +11 -0
  22. package/dst/speechflow-node-format.js +80 -0
  23. package/dst/speechflow-node-gemma.d.ts +3 -1
  24. package/dst/speechflow-node-gemma.js +84 -23
  25. package/dst/speechflow-node-mqtt.d.ts +13 -0
  26. package/dst/speechflow-node-mqtt.js +181 -0
  27. package/dst/speechflow-node-opus.d.ts +12 -0
  28. package/dst/speechflow-node-opus.js +135 -0
  29. package/dst/speechflow-node-subtitle.d.ts +12 -0
  30. package/dst/speechflow-node-subtitle.js +96 -0
  31. package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
  32. package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
  33. package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
  34. package/dst/speechflow-node-t2t-deepl.js +133 -0
  35. package/dst/speechflow-node-t2t-format.d.ts +11 -0
  36. package/dst/speechflow-node-t2t-format.js +80 -0
  37. package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
  38. package/dst/speechflow-node-t2t-gemma.js +213 -0
  39. package/dst/speechflow-node-t2t-opus.d.ts +12 -0
  40. package/dst/speechflow-node-t2t-opus.js +135 -0
  41. package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
  42. package/dst/speechflow-node-t2t-subtitle.js +96 -0
  43. package/dst/speechflow-node-trace.d.ts +11 -0
  44. package/dst/speechflow-node-trace.js +88 -0
  45. package/dst/speechflow-node-wav.d.ts +11 -0
  46. package/dst/speechflow-node-wav.js +170 -0
  47. package/dst/speechflow-node-websocket.d.ts +3 -1
  48. package/dst/speechflow-node-websocket.js +149 -49
  49. package/dst/speechflow-node-whisper-common.d.ts +34 -0
  50. package/dst/speechflow-node-whisper-common.js +7 -0
  51. package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
  52. package/dst/speechflow-node-whisper-ggml.js +97 -0
  53. package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
  54. package/dst/speechflow-node-whisper-onnx.js +131 -0
  55. package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
  56. package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
  57. package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
  58. package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
  59. package/dst/speechflow-node-whisper-worker.d.ts +1 -0
  60. package/dst/speechflow-node-whisper-worker.js +116 -0
  61. package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
  62. package/dst/speechflow-node-whisper-worker2.js +82 -0
  63. package/dst/speechflow-node-whisper.d.ts +19 -0
  64. package/dst/speechflow-node-whisper.js +604 -0
  65. package/dst/speechflow-node-x2x-trace.d.ts +11 -0
  66. package/dst/speechflow-node-x2x-trace.js +88 -0
  67. package/dst/speechflow-node-xio-device.d.ts +13 -0
  68. package/dst/speechflow-node-xio-device.js +205 -0
  69. package/dst/speechflow-node-xio-file.d.ts +11 -0
  70. package/dst/speechflow-node-xio-file.js +176 -0
  71. package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
  72. package/dst/speechflow-node-xio-mqtt.js +181 -0
  73. package/dst/speechflow-node-xio-websocket.d.ts +13 -0
  74. package/dst/speechflow-node-xio-websocket.js +275 -0
  75. package/dst/speechflow-node.d.ts +25 -7
  76. package/dst/speechflow-node.js +74 -9
  77. package/dst/speechflow-utils.d.ts +23 -0
  78. package/dst/speechflow-utils.js +194 -0
  79. package/dst/speechflow.js +146 -43
  80. package/etc/biome.jsonc +12 -4
  81. package/etc/stx.conf +65 -0
  82. package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
  83. package/package.json +49 -31
  84. package/sample.yaml +61 -23
  85. package/src/lib.d.ts +6 -1
  86. package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
  87. package/src/speechflow-node-a2a-wav.ts +143 -0
  88. package/src/speechflow-node-a2t-deepgram.ts +199 -0
  89. package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
  90. package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
  91. package/src/speechflow-node-t2t-format.ts +85 -0
  92. package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
  93. package/src/speechflow-node-t2t-opus.ts +111 -0
  94. package/src/speechflow-node-t2t-subtitle.ts +101 -0
  95. package/src/speechflow-node-x2x-trace.ts +92 -0
  96. package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
  97. package/src/speechflow-node-xio-file.ts +153 -0
  98. package/src/speechflow-node-xio-mqtt.ts +154 -0
  99. package/src/speechflow-node-xio-websocket.ts +248 -0
  100. package/src/speechflow-node.ts +78 -13
  101. package/src/speechflow-utils.ts +212 -0
  102. package/src/speechflow.ts +150 -43
  103. package/etc/nps.yaml +0 -40
  104. package/src/speechflow-node-deepgram.ts +0 -133
  105. package/src/speechflow-node-elevenlabs.ts +0 -116
  106. package/src/speechflow-node-file.ts +0 -108
  107. package/src/speechflow-node-websocket.ts +0 -179
package/src/speechflow.ts CHANGED
@@ -5,9 +5,12 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
+ import path from "node:path"
8
9
  import Stream from "node:stream"
10
+ import { EventEmitter } from "node:events"
9
11
 
10
12
  /* external dependencies */
13
+ import { DateTime } from "luxon"
11
14
  import CLIio from "cli-io"
12
15
  import yargs from "yargs"
13
16
  import jsYAML from "js-yaml"
@@ -15,6 +18,7 @@ import FlowLink from "flowlink"
15
18
  import objectPath from "object-path"
16
19
  import installedPackages from "installed-packages"
17
20
  import dotenvx from "@dotenvx/dotenvx"
21
+ import syspath from "syspath"
18
22
 
19
23
  /* internal dependencies */
20
24
  import SpeechFlowNode from "./speechflow-node"
@@ -25,6 +29,12 @@ let cli: CLIio | null = null
25
29
 
26
30
  /* establish asynchronous environment */
27
31
  ;(async () => {
32
+ /* determine system paths */
33
+ const { dataDir } = syspath({
34
+ appName: "speechflow",
35
+ dataDirAutoCreate: true
36
+ })
37
+
28
38
  /* parse command-line arguments */
29
39
  const args = await yargs()
30
40
  /* eslint @stylistic/indent: off */
@@ -33,9 +43,10 @@ let cli: CLIio | null = null
33
43
  "[-h|--help] " +
34
44
  "[-V|--version] " +
35
45
  "[-v|--verbose <level>] " +
46
+ "[-C|--cache <directory>] " +
36
47
  "[-e|--expression <expression>] " +
37
- "[-f|--expression-file <expression-file>] " +
38
- "[-c|--config <key>@<yaml-config-file>] " +
48
+ "[-f|--file <file>] " +
49
+ "[-c|--config <id>@<yaml-config-file>] " +
39
50
  "[<argument> [...]]"
40
51
  )
41
52
  .help("h").alias("h", "help").default("h", false)
@@ -44,12 +55,14 @@ let cli: CLIio | null = null
44
55
  .describe("V", "show program version information")
45
56
  .string("v").nargs("v", 1).alias("v", "log-level").default("v", "warning")
46
57
  .describe("v", "level for verbose logging ('none', 'error', 'warning', 'info', 'debug')")
58
+ .string("C").nargs("C", 1).alias("C", "cache").default("C", path.join(dataDir, "cache"))
59
+ .describe("C", "directory for cached files (primarily AI model files)")
47
60
  .string("e").nargs("e", 1).alias("e", "expression").default("e", "")
48
- .describe("e", "FlowLink expression")
49
- .string("f").nargs("f", 1).alias("f", "expression-file").default("f", "")
61
+ .describe("e", "FlowLink expression string")
62
+ .string("f").nargs("f", 1).alias("f", "file").default("f", "")
50
63
  .describe("f", "FlowLink expression file")
51
64
  .string("c").nargs("c", 1).alias("c", "config-file").default("c", "")
52
- .describe("c", "configuration in format <id>@<file>")
65
+ .describe("c", "FlowLink expression reference into YAML file (in format <id>@<file>)")
53
66
  .version(false)
54
67
  .strict()
55
68
  .showHelpOnFail(true)
@@ -128,14 +141,20 @@ let cli: CLIio | null = null
128
141
 
129
142
  /* load internal SpeechFlow nodes */
130
143
  const pkgsI = [
131
- "./speechflow-node-file.js",
132
- "./speechflow-node-device.js",
133
- "./speechflow-node-websocket.js",
134
- "./speechflow-node-ffmpeg.js",
135
- "./speechflow-node-deepgram.js",
136
- "./speechflow-node-deepl.js",
137
- "./speechflow-node-elevenlabs.js",
138
- "./speechflow-node-gemma.js",
144
+ "./speechflow-node-a2a-ffmpeg.js",
145
+ "./speechflow-node-a2a-wav.js",
146
+ "./speechflow-node-a2t-deepgram.js",
147
+ "./speechflow-node-t2a-elevenlabs.js",
148
+ "./speechflow-node-t2t-deepl.js",
149
+ "./speechflow-node-t2t-format.js",
150
+ "./speechflow-node-t2t-gemma.js",
151
+ "./speechflow-node-t2t-opus.js",
152
+ "./speechflow-node-t2t-subtitle.js",
153
+ "./speechflow-node-x2x-trace.js",
154
+ "./speechflow-node-xio-device.js",
155
+ "./speechflow-node-xio-file.js",
156
+ "./speechflow-node-xio-mqtt.js",
157
+ "./speechflow-node-xio-websocket.js"
139
158
  ]
140
159
  for (const pkg of pkgsI) {
141
160
  let node: any = await import(pkg)
@@ -175,35 +194,79 @@ let cli: CLIio | null = null
175
194
  let nodenum = 1
176
195
  const variables = { argv: args._, env: process.env }
177
196
  const graphNodes = new Set<SpeechFlowNode>()
178
- flowlink.evaluate(config, {
179
- resolveVariable (id: string) {
180
- if (!objectPath.has(variables, id))
181
- throw new Error(`failed to resolve variable "${id}"`)
182
- const value = objectPath.get(variables, id)
183
- cli!.log("info", `resolve variable: "${id}" -> "${value}"`)
184
- return value
185
- },
186
- createNode (id: string, opts: { [ id: string ]: any }, args: any[]) {
187
- if (nodes[id] === undefined)
188
- throw new Error(`unknown node "${id}"`)
189
- const node = new nodes[id](`${id}[${nodenum++}]`, opts, args)
190
- const params = Object.keys(node.params)
191
- .map((key) => `${key}: ${JSON.stringify(node.params[key])}`).join(", ")
192
- cli!.log("info", `create node "${node.id}" (${params})`)
193
- graphNodes.add(node)
194
- return node
195
- },
196
- connectNode (node1: SpeechFlowNode, node2: SpeechFlowNode) {
197
- cli!.log("info", `connect node "${node1.id}" to node "${node2.id}"`)
198
- node1.connect(node2)
199
- }
200
- })
197
+ const cfg = {
198
+ audioChannels: 1,
199
+ audioBitDepth: 16,
200
+ audioLittleEndian: true,
201
+ audioSampleRate: 48000,
202
+ textEncoding: "utf8",
203
+ cacheDir: args.cache
204
+ }
205
+ let ast: unknown
206
+ try {
207
+ ast = flowlink.compile(config)
208
+ }
209
+ catch (err) {
210
+ if (err instanceof Error && err.name === "FlowLinkError")
211
+ cli!.log("error", `failed to parse SpeechFlow configuration: ${err.toString()}"`)
212
+ else if (err instanceof Error)
213
+ cli!.log("error", `failed to parse SpeechFlow configuration: ${err.message}"`)
214
+ else
215
+ cli!.log("error", "failed to parse SpeechFlow configuration: internal error")
216
+ process.exit(1)
217
+ }
218
+ try {
219
+ flowlink.execute(ast, {
220
+ resolveVariable (id: string) {
221
+ if (!objectPath.has(variables, id))
222
+ throw new Error(`failed to resolve variable "${id}"`)
223
+ const value = objectPath.get(variables, id)
224
+ cli!.log("info", `resolve variable: "${id}" -> "${value}"`)
225
+ return value
226
+ },
227
+ createNode (id: string, opts: { [ id: string ]: any }, args: any[]) {
228
+ if (nodes[id] === undefined)
229
+ throw new Error(`unknown node "${id}"`)
230
+ let node: SpeechFlowNode
231
+ try {
232
+ node = new nodes[id](`${id}[${nodenum}]`, cfg, opts, args)
233
+ }
234
+ catch (err) {
235
+ /* fatal error */
236
+ if (err instanceof Error)
237
+ cli!.log("error", `creation of "${id}[${nodenum}]" node failed: ${err.message}`)
238
+ else
239
+ cli!.log("error", `creation of "${id}"[${nodenum}] node failed: ${err}`)
240
+ process.exit(1)
241
+ }
242
+ nodenum++
243
+ const params = Object.keys(node.params)
244
+ .map((key) => `${key}: ${JSON.stringify(node.params[key])}`).join(", ")
245
+ cli!.log("info", `create node "${node.id}" (${params})`)
246
+ graphNodes.add(node)
247
+ return node
248
+ },
249
+ connectNode (node1: SpeechFlowNode, node2: SpeechFlowNode) {
250
+ cli!.log("info", `connect node "${node1.id}" to node "${node2.id}"`)
251
+ node1.connect(node2)
252
+ }
253
+ })
254
+ }
255
+ catch (err) {
256
+ if (err instanceof Error && err.name === "FlowLinkError")
257
+ cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}"`)
258
+ else if (err instanceof Error)
259
+ cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.message}"`)
260
+ else
261
+ cli!.log("error", "failed to materialize SpeechFlow configuration: internal error")
262
+ process.exit(1)
263
+ }
201
264
 
202
265
  /* graph processing: PASS 2: prune connections of nodes */
203
266
  for (const node of graphNodes) {
204
267
  /* determine connections */
205
- const connectionsIn = Array.from(node.connectionsIn)
206
- const connectionsOut = Array.from(node.connectionsOut)
268
+ let connectionsIn = Array.from(node.connectionsIn)
269
+ let connectionsOut = Array.from(node.connectionsOut)
207
270
 
208
271
  /* ensure necessary incoming links */
209
272
  if (node.input !== "none" && connectionsIn.length === 0)
@@ -222,6 +285,8 @@ let cli: CLIio | null = null
222
285
  connectionsOut.forEach((other) => { node.disconnect(other) })
223
286
 
224
287
  /* check for payload compatibility */
288
+ connectionsIn = Array.from(node.connectionsIn)
289
+ connectionsOut = Array.from(node.connectionsOut)
225
290
  for (const other of connectionsOut)
226
291
  if (other.input !== node.output)
227
292
  throw new Error(`${node.output} output node "${node.id}" cannot be ` +
@@ -232,7 +297,7 @@ let cli: CLIio | null = null
232
297
  for (const node of graphNodes) {
233
298
  /* connect node events */
234
299
  node.on("log", (level: string, msg: string, data?: any) => {
235
- let str = `[${node.id}]: ${msg}`
300
+ let str = `<${node.id}>: ${msg}`
236
301
  if (data !== undefined)
237
302
  str += ` (${JSON.stringify(data)})`
238
303
  cli!.log(level, str)
@@ -246,7 +311,14 @@ let cli: CLIio | null = null
246
311
  })
247
312
  }
248
313
 
249
- /* graph processing: PASS 4: connect node streams */
314
+ /* graph processing: PASS 4: set time zero in all nodes */
315
+ const timeZero = DateTime.now()
316
+ for (const node of graphNodes) {
317
+ cli!.log("info", `set time zero in node "${node.id}"`)
318
+ node.setTimeZero(timeZero)
319
+ }
320
+
321
+ /* graph processing: PASS 5: connect node streams */
250
322
  for (const node of graphNodes) {
251
323
  if (node.stream === null)
252
324
  throw new Error(`stream of node "${node.id}" still not initialized`)
@@ -264,13 +336,40 @@ let cli: CLIio | null = null
264
336
  }
265
337
  }
266
338
 
339
+ /* graph processing: PASS 6: track stream finishing */
340
+ const activeNodes = new Set<SpeechFlowNode>()
341
+ const finishEvents = new EventEmitter()
342
+ for (const node of graphNodes) {
343
+ if (node.stream === null)
344
+ throw new Error(`stream of node "${node.id}" still not initialized`)
345
+ cli!.log("info", `observe stream of node "${node.id}" for finish event`)
346
+ activeNodes.add(node)
347
+ node.stream.on("finish", () => {
348
+ activeNodes.delete(node)
349
+ cli!.log("info", `stream of node "${node.id}" finished (${activeNodes.size} nodes remaining active)`)
350
+ if (activeNodes.size === 0) {
351
+ const timeFinished = DateTime.now()
352
+ const duration = timeFinished.diff(timeZero)
353
+ cli!.log("info", "everything finished -- stream processing in SpeechFlow graph stops " +
354
+ `(total duration: ${duration.toFormat("hh:mm:ss.SSS")})`)
355
+ finishEvents.emit("finished")
356
+ }
357
+ })
358
+ }
359
+
360
+ /* start of internal stream processing */
361
+ cli!.log("info", "everything established -- stream processing in SpeechFlow graph starts")
362
+
267
363
  /* gracefully shutdown process */
268
364
  let shuttingDown = false
269
365
  const shutdown = async (signal: string) => {
270
366
  if (shuttingDown)
271
367
  return
272
368
  shuttingDown = true
273
- cli!.log("warning", `received signal ${signal} -- shutting down service`)
369
+ if (signal === "finished")
370
+ cli!.log("info", "streams of all nodes finished -- shutting down service")
371
+ else
372
+ cli!.log("warning", `received signal ${signal} -- shutting down service`)
274
373
 
275
374
  /* graph processing: PASS 1: disconnect node streams */
276
375
  for (const node of graphNodes) {
@@ -301,7 +400,9 @@ let cli: CLIio | null = null
301
400
  /* graph processing: PASS 2: close nodes */
302
401
  for (const node of graphNodes) {
303
402
  cli!.log("info", `close node "${node.id}"`)
304
- await node.close()
403
+ await node.close().catch((err) => {
404
+ cli!.log("warning", `node "${node.id}" failed to close: ${err}`)
405
+ })
305
406
  }
306
407
 
307
408
  /* graph processing: PASS 3: disconnect nodes */
@@ -320,8 +421,14 @@ let cli: CLIio | null = null
320
421
  }
321
422
 
322
423
  /* terminate process */
323
- process.exit(1)
424
+ if (signal === "finished")
425
+ process.exit(0)
426
+ else
427
+ process.exit(1)
324
428
  }
429
+ finishEvents.on("finished", () => {
430
+ shutdown("finished")
431
+ })
325
432
  process.on("SIGINT", () => {
326
433
  shutdown("SIGINT")
327
434
  })
package/etc/nps.yaml DELETED
@@ -1,40 +0,0 @@
1
- ##
2
- ## SpeechFlow - Speech Processing Flow Graph
3
- ## Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
- ## Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
- ##
6
-
7
- scripts:
8
- # multiview-style development dashboard
9
- dev: >
10
- stmux -w always -m beep -e "built.in.+ms" --
11
- [ -s 35% "npm start lint-watch" :
12
- -s 15% "npm start build-watch" :
13
- -s 30% "npm start server-delay server-watch" ]
14
-
15
- # static code analysis (linting)
16
- lint-watch: nodemon --exec "npm start lint" --watch src --ext ts
17
- lint: npm start lint-tsc lint-oxlint lint-biome lint-eslint
18
- lint-tsc: tsc --project etc/tsconfig.json --noEmit
19
- lint-oxlint: oxlint --config etc/oxlint.jsonc src/**/*.ts
20
- lint-biome: biome lint --diagnostic-level=warn --config-path=etc/biome.jsonc src/*.ts
21
- lint-eslint: eslint --config etc/eslint.mjs src/**/*.ts
22
-
23
- # code compilation/transpiling (building)
24
- build: >
25
- tsc --project etc/tsconfig.json &&
26
- (echo "#!/usr/bin/env node"; cat dst/speechflow.js) >dst/speechflow.js.new &&
27
- mv dst/speechflow.js.new dst/speechflow.js
28
- build-watch: nodemon --exec "npm start build" --watch src --ext ts
29
-
30
- # start server run-time
31
- server-delay: delay 2.0
32
- server: node dst/speechflow.js -v info -c sample@sample.yaml
33
- server-watch: >
34
- cross-env NODE_OPTIONS="--enable-source-maps"
35
- nodemon --exec "npm start server" --watch dst --ext ts --delay 1.0
36
-
37
- # cleanup filesystem
38
- clean: rimraf dst
39
- clean-dist: rimraf dst node_modules
40
-
@@ -1,133 +0,0 @@
1
- /*
2
- ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
- */
6
-
7
- /* standard dependencies */
8
- import { EventEmitter } from "node:events"
9
-
10
- /* external dependencies */
11
- import Stream from "node:stream"
12
- import * as Deepgram from "@deepgram/sdk"
13
-
14
- /* internal dependencies */
15
- import SpeechFlowNode from "./speechflow-node"
16
-
17
- /* SpeechFlow node for Deepgram speech-to-text conversion */
18
- export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
19
- /* declare official node name */
20
- public static name = "deepgram"
21
-
22
- /* internal state */
23
- private dg: Deepgram.LiveClient | null = null
24
-
25
- /* construct node */
26
- constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
27
- super(id, opts, args)
28
-
29
- /* declare node configuration parameters */
30
- this.configure({
31
- key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
32
- model: { type: "string", val: "nova-3", pos: 0 },
33
- version: { type: "string", val: "latest", pos: 1 },
34
- language: { type: "string", val: "multi", pos: 2 }
35
- })
36
-
37
- /* declare node input/output format */
38
- this.input = "audio"
39
- this.output = "text"
40
- }
41
-
42
- /* open node */
43
- async open () {
44
- /* sanity check situation */
45
- if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
46
- throw new Error("Deepgram node currently supports PCM-S16LE audio only")
47
-
48
- /* create queue for results */
49
- const queue = new EventEmitter()
50
-
51
- /* connect to Deepgram API */
52
- const deepgram = Deepgram.createClient(this.params.key)
53
- this.dg = deepgram.listen.live({
54
- model: this.params.model,
55
- version: this.params.version,
56
- language: this.params.language,
57
- channels: this.config.audioChannels,
58
- sample_rate: this.config.audioSampleRate,
59
- encoding: "linear16",
60
- multichannel: false,
61
- endpointing: 10,
62
- interim_results: false,
63
- smart_format: true,
64
- punctuate: true,
65
- filler_words: true,
66
- diarize: true,
67
- numerals: true,
68
- paragraphs: true,
69
- profanity_filter: true,
70
- utterances: false
71
- })
72
-
73
- /* hook onto Deepgram API events */
74
- this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
75
- const text = data.channel?.alternatives[0].transcript ?? ""
76
- if (text === "")
77
- return
78
- queue.emit("text", text)
79
- })
80
- this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
81
- this.log("info", "Deepgram: metadata received")
82
- })
83
- this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
84
- this.log("info", "Deepgram: connection close")
85
- })
86
- this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
87
- this.log("error", `Deepgram: ${error.message}`)
88
- this.emit("error")
89
- })
90
-
91
- /* wait for Deepgram API to be available */
92
- await new Promise((resolve) => {
93
- this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
94
- this.log("info", "Deepgram: connection open")
95
- resolve(true)
96
- })
97
- })
98
-
99
- /* provide Duplex stream and internally attach to Deepgram API */
100
- const dg = this.dg
101
- this.stream = new Stream.Duplex({
102
- write (chunk: Buffer, encoding, callback) {
103
- const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
104
- if (data.byteLength === 0)
105
- queue.emit("text", "")
106
- else
107
- dg.send(data)
108
- callback()
109
- },
110
- read (size) {
111
- queue.once("text", (text: string) => {
112
- this.push(text)
113
- })
114
- },
115
- final (callback) {
116
- dg.requestClose()
117
- }
118
- })
119
- }
120
-
121
- /* close node */
122
- async close () {
123
- /* close stream */
124
- if (this.stream !== null) {
125
- this.stream.destroy()
126
- this.stream = null
127
- }
128
-
129
- /* shutdown Deepgram API */
130
- if (this.dg !== null)
131
- this.dg.requestClose()
132
- }
133
- }
@@ -1,116 +0,0 @@
1
- /*
2
- ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
- */
6
-
7
- /* standard dependencies */
8
- import Stream from "node:stream"
9
- import { EventEmitter } from "node:events"
10
-
11
- /* external dependencies */
12
- import * as ElevenLabs from "elevenlabs"
13
- import { getStreamAsBuffer } from "get-stream"
14
-
15
- /* internal dependencies */
16
- import SpeechFlowNode from "./speechflow-node"
17
-
18
- /*
19
- const elevenlabsVoices = {
20
- "drew": { name: "Drew", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
21
- "george": { name: "George", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
22
- "bill": { name: "Bill", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
23
- "daniel": { name: "Daniel", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
24
- "brian": { name: "Brian", model: "eleven_turbo_v2", lang: [ "en" ] },
25
- "sarah": { name: "Sarah", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
26
- "racel": { name: "Racel", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
27
- "grace": { name: "Grace", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
28
- "matilda": { name: "Matilda", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
29
- "alice": { name: "Alice", model: "eleven_turbo_v2", lang: [ "en" ] }
30
- }
31
- */
32
-
33
- export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
34
- /* declare official node name */
35
- public static name = "elevenlabs"
36
-
37
- /* internal state */
38
- private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
39
-
40
- /* construct node */
41
- constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
42
- super(id, opts, args)
43
-
44
- /* declare node configuration parameters */
45
- this.configure({
46
- key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
47
- voice: { type: "string", val: "Brian", pos: 0 },
48
- language: { type: "string", val: "de", pos: 1 }
49
- })
50
-
51
- /* declare node input/output format */
52
- this.input = "text"
53
- this.output = "audio"
54
- }
55
-
56
- /* open node */
57
- async open () {
58
- this.elevenlabs = new ElevenLabs.ElevenLabsClient({
59
- apiKey: this.params.key
60
- })
61
- const voices = await this.elevenlabs.voices.getAll()
62
- const voice = voices.voices.find((voice) => voice.name === this.params.voice)
63
- if (voice === undefined)
64
- throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
65
- const speechStream = (text: string) => {
66
- return this.elevenlabs!.textToSpeech.convert(voice.voice_id, {
67
- text,
68
- optimize_streaming_latency: 2,
69
- output_format: "pcm_16000", // S16LE
70
- model_id: "eleven_flash_v2_5",
71
- /*
72
- voice_settings: {
73
- stability: 0,
74
- similarity_boost: 0
75
- }
76
- */
77
- }, {
78
- timeoutInSeconds: 30,
79
- maxRetries: 10
80
- })
81
- }
82
- const queue = new EventEmitter()
83
- this.stream = new Stream.Duplex({
84
- write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
85
- if (encoding !== "utf8" && encoding !== "utf-8")
86
- callback(new Error("only text input supported by Elevenlabs node"))
87
- const data = chunk.toString()
88
- speechStream(data).then((stream) => {
89
- getStreamAsBuffer(stream).then((buffer) => {
90
- queue.emit("audio", buffer)
91
- callback()
92
- }).catch((error) => {
93
- callback(error)
94
- })
95
- }).catch((error) => {
96
- callback(error)
97
- })
98
- },
99
- read (size: number) {
100
- queue.once("audio", (buffer: Buffer) => {
101
- this.push(buffer, "binary")
102
- })
103
- }
104
- })
105
- }
106
-
107
- /* close node */
108
- async close () {
109
- /* destroy stream */
110
- if (this.stream !== null) {
111
- this.stream.destroy()
112
- this.stream = null
113
- }
114
- }
115
- }
116
-