speechflow 1.7.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/README.md +425 -146
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-api.js +6 -5
  7. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  8. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  9. package/speechflow-cli/dst/speechflow-main-graph.js +35 -13
  10. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-main-status.js +3 -7
  12. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +3 -0
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -2
  16. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -2
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +2 -2
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +1 -2
  23. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +32 -5
  25. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  27. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +1 -6
  28. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +0 -1
  30. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +9 -9
  31. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
  33. package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
  34. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  35. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -4
  36. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  37. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +6 -11
  38. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  39. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +6 -5
  40. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  42. package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
  43. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +2 -0
  45. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +19 -6
  46. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  48. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
  49. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  50. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  51. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
  52. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  53. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  54. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  55. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
  56. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  57. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  58. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  59. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  60. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  61. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  62. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  63. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  64. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
  65. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  66. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
  67. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  68. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  69. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  70. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  71. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  72. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  73. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  74. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
  75. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +10 -0
  76. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -3
  79. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  81. package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
  82. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  83. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  84. package/speechflow-cli/dst/speechflow-node-xio-file.js +80 -67
  85. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  86. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +2 -1
  87. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  88. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  89. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  90. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  91. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  92. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
  93. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  94. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +2 -1
  95. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-util-audio.js +5 -6
  97. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  98. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
  99. package/speechflow-cli/dst/speechflow-util-error.js +5 -7
  100. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  101. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  102. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  103. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  104. package/speechflow-cli/dst/speechflow-util-misc.d.ts +1 -1
  105. package/speechflow-cli/dst/speechflow-util-misc.js +4 -4
  106. package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
  107. package/speechflow-cli/dst/speechflow-util-queue.js +3 -3
  108. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  109. package/speechflow-cli/dst/speechflow-util-stream.js +4 -2
  110. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  111. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  112. package/speechflow-cli/dst/speechflow-util.js +1 -0
  113. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  114. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  115. package/speechflow-cli/package.json +34 -17
  116. package/speechflow-cli/src/lib.d.ts +5 -0
  117. package/speechflow-cli/src/speechflow-main-api.ts +6 -5
  118. package/speechflow-cli/src/speechflow-main-graph.ts +40 -13
  119. package/speechflow-cli/src/speechflow-main-status.ts +4 -8
  120. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -0
  121. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -2
  122. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -1
  123. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -2
  124. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +2 -2
  125. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -2
  126. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +33 -6
  127. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -11
  128. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +13 -12
  129. package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
  130. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +8 -4
  131. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +7 -11
  132. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -5
  133. package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
  134. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +22 -6
  135. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
  136. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
  137. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
  138. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
  139. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  140. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  141. package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
  142. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
  143. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  144. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  145. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +16 -3
  146. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +3 -3
  147. package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -7
  148. package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
  149. package/speechflow-cli/src/speechflow-node-xio-file.ts +93 -80
  150. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +3 -2
  151. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  152. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
  153. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +2 -1
  154. package/speechflow-cli/src/speechflow-util-audio-wt.ts +4 -4
  155. package/speechflow-cli/src/speechflow-util-audio.ts +10 -10
  156. package/speechflow-cli/src/speechflow-util-error.ts +9 -7
  157. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  158. package/speechflow-cli/src/speechflow-util-misc.ts +4 -4
  159. package/speechflow-cli/src/speechflow-util-queue.ts +4 -4
  160. package/speechflow-cli/src/speechflow-util-stream.ts +5 -3
  161. package/speechflow-cli/src/speechflow-util.ts +1 -0
  162. package/speechflow-ui-db/package.json +9 -9
  163. package/speechflow-ui-st/package.json +9 -9
  164. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  165. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  166. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  167. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  168. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  169. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
68
68
  public static name = "a2t-amazon"
69
69
 
70
70
  /* internal state */
71
- private client: TranscribeStreamingClient | null = null
72
- private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
- private closing = false
74
- private initTimeout: ReturnType<typeof setTimeout> | null = null
75
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
71
+ private client: TranscribeStreamingClient | null = null
72
+ private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
+ private closing = false
74
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
76
75
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
77
76
 
78
77
  /* construct node */
@@ -194,8 +193,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
194
193
  this.queue?.write(chunk)
195
194
  }
196
195
  }
197
- })().catch((err: Error) => {
198
- this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${err}`)
196
+ })().catch((err: unknown) => {
197
+ this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
199
198
  })
200
199
  }
201
200
 
@@ -281,10 +280,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
281
280
  this.closing = true
282
281
 
283
282
  /* cleanup all timers */
284
- if (this.initTimeout !== null) {
285
- clearTimeout(this.initTimeout)
286
- this.initTimeout = null
287
- }
288
283
  if (this.connectionTimeout !== null) {
289
284
  clearTimeout(this.connectionTimeout)
290
285
  this.connectionTimeout = null
@@ -21,10 +21,9 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
21
21
  public static name = "a2t-deepgram"
22
22
 
23
23
  /* internal state */
24
- private dg: Deepgram.LiveClient | null = null
25
- private closing = false
26
- private initTimeout: ReturnType<typeof setTimeout> | null = null
27
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
24
+ private dg: Deepgram.LiveClient | null = null
25
+ private closing = false
26
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
28
27
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
28
 
30
29
  /* construct node */
@@ -41,6 +40,10 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
41
40
  interim: { type: "boolean", val: false, pos: 3 }
42
41
  })
43
42
 
43
+ /* sanity check parameters */
44
+ if (!this.params.key)
45
+ throw new Error("Deepgram API key not configured")
46
+
44
47
  /* declare node input/output format */
45
48
  this.input = "audio"
46
49
  this.output = "text"
@@ -126,7 +129,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
126
129
  this.log("info", `text received (start: ${data.start}s, ` +
127
130
  `duration: ${data.duration.toFixed(2)}s, ` +
128
131
  `kind: ${isFinal ? "final" : "intermediate"}): ` +
129
- `${text}"`)
132
+ `"${text}"`)
130
133
  const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
131
134
  const end = start.plus({ seconds: data.duration })
132
135
  const metas = metastore.fetch(start, end)
@@ -163,14 +166,16 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
163
166
  this.log("error", `error: ${error.message}`)
164
167
  if (!this.closing && this.queue !== null)
165
168
  this.queue.write(null)
166
- this.emit("error")
169
+ this.emit("error", error)
167
170
  })
168
171
 
169
172
  /* wait for Deepgram API to be available */
170
173
  await new Promise((resolve, reject) => {
171
174
  this.connectionTimeout = setTimeout(() => {
172
- this.connectionTimeout = null
173
- reject(new Error("Deepgram: timeout waiting for connection open"))
175
+ if (this.connectionTimeout !== null) {
176
+ this.connectionTimeout = null
177
+ reject(new Error("Deepgram: timeout waiting for connection open"))
178
+ }
174
179
  }, 8000)
175
180
  this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
176
181
  this.log("info", "connection open")
@@ -271,10 +276,6 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
271
276
  this.closing = true
272
277
 
273
278
  /* cleanup all timers */
274
- if (this.initTimeout !== null) {
275
- clearTimeout(this.initTimeout)
276
- this.initTimeout = null
277
- }
278
279
  if (this.connectionTimeout !== null) {
279
280
  clearTimeout(this.connectionTimeout)
280
281
  this.connectionTimeout = null
@@ -0,0 +1,322 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import * as GoogleSpeech from "@google-cloud/speech"
12
+ import { DateTime, Duration } from "luxon"
13
+ import * as arktype from "arktype"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as util from "./speechflow-util"
18
+
19
+ /* SpeechFlow node for Google Cloud speech-to-text conversion */
20
+ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
21
+ /* declare official node name */
22
+ public static name = "a2t-google"
23
+
24
+ /* internal state */
25
+ private client: GoogleSpeech.SpeechClient | null = null
26
+ private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
27
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
28
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
+ private closing = false
30
+
31
+ /* construct node */
32
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
33
+ super(id, cfg, opts, args)
34
+
35
+ /* declare node configuration parameters */
36
+ this.configure({
37
+ key: { type: "string", val: process.env.SPEECHFLOW_GOOGLE_KEY ?? "" },
38
+ model: { type: "string", pos: 0, val: "latest_long" },
39
+ language: { type: "string", pos: 1, val: "en-US" },
40
+ interim: { type: "boolean", pos: 2, val: false }
41
+ })
42
+
43
+ /* validate API key */
44
+ if (this.params.key === "")
45
+ throw new Error("Google Cloud API credentials JSON key is required")
46
+
47
+ /* declare node input/output format */
48
+ this.input = "audio"
49
+ this.output = "text"
50
+ }
51
+
52
+ /* one-time status of node */
53
+ async status () {
54
+ return {}
55
+ }
56
+
57
+ /* open node */
58
+ async open () {
59
+ /* sanity check situation */
60
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
61
+ throw new Error("Google Speech node currently supports PCM-S16LE audio only")
62
+
63
+ /* clear destruction flag */
64
+ this.closing = false
65
+
66
+ /* create queue for results */
67
+ this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
68
+
69
+ /* create a store for the meta information */
70
+ const metastore = new util.TimeStore<Map<string, any>>()
71
+
72
+ /* instantiate Google Speech client */
73
+ const data = util.run("Google Cloud API credentials key", () =>
74
+ JSON.parse(this.params.key))
75
+ const credentials = util.importObject("Google Cloud API credentials key",
76
+ data,
77
+ arktype.type({
78
+ project_id: "string",
79
+ private_key: "string",
80
+ client_email: "string"
81
+ })
82
+ )
83
+ this.client = new GoogleSpeech.SpeechClient({
84
+ credentials: {
85
+ private_key: credentials.private_key,
86
+ client_email: credentials.client_email
87
+ },
88
+ projectId: credentials.project_id
89
+ })
90
+
91
+ /* create streaming recognition request */
92
+ this.recognizeStream = this.client.streamingRecognize({
93
+ config: {
94
+ encoding: "LINEAR16",
95
+ sampleRateHertz: this.config.audioSampleRate,
96
+ languageCode: this.params.language,
97
+ model: this.params.model,
98
+ enableAutomaticPunctuation: true,
99
+ enableWordTimeOffsets: true
100
+ },
101
+ interimResults: this.params.interim
102
+ })
103
+
104
+ /* hook onto Google Speech API events */
105
+ this.recognizeStream.on("data", (data: GoogleSpeech.protos.google.cloud.speech.v1.IStreamingRecognizeResponse) => {
106
+ if (this.closing || this.queue === null)
107
+ return
108
+ if (!data.results || data.results.length === 0)
109
+ return
110
+ for (const result of data.results) {
111
+ if (!result.alternatives || result.alternatives.length === 0)
112
+ continue
113
+ const alternative = result.alternatives[0]
114
+ const text = alternative.transcript ?? ""
115
+ if (text === "")
116
+ continue
117
+ const isFinal = result.isFinal ?? false
118
+ if (!isFinal && !this.params.interim)
119
+ continue
120
+
121
+ /* calculate timestamps */
122
+ let tsStart = Duration.fromMillis(0)
123
+ let tsEnd = Duration.fromMillis(0)
124
+
125
+ /* extract word timing information if available */
126
+ const words: { word: string, start: Duration, end: Duration }[] = []
127
+ if (alternative.words && alternative.words.length > 0) {
128
+ for (const wordInfo of alternative.words) {
129
+ const wordStart = wordInfo.startTime
130
+ ? Duration.fromMillis(
131
+ (Number(wordInfo.startTime.seconds ?? 0) * 1000) +
132
+ (Number(wordInfo.startTime.nanos ?? 0) / 1000000)
133
+ ).plus(this.timeZeroOffset)
134
+ : Duration.fromMillis(0)
135
+ const wordEnd = wordInfo.endTime
136
+ ? Duration.fromMillis(
137
+ (Number(wordInfo.endTime.seconds ?? 0) * 1000) +
138
+ (Number(wordInfo.endTime.nanos ?? 0) / 1000000)
139
+ ).plus(this.timeZeroOffset)
140
+ : Duration.fromMillis(0)
141
+ words.push({
142
+ word: wordInfo.word ?? "",
143
+ start: wordStart,
144
+ end: wordEnd
145
+ })
146
+ }
147
+ if (words.length > 0) {
148
+ tsStart = words[0].start
149
+ tsEnd = words[words.length - 1].end
150
+ }
151
+ }
152
+ else {
153
+ /* fallback: use result timing */
154
+ const resultEnd = result.resultEndTime
155
+ if (resultEnd) {
156
+ tsEnd = Duration.fromMillis(
157
+ (Number(resultEnd.seconds ?? 0) * 1000) +
158
+ (Number(resultEnd.nanos ?? 0) / 1000000)
159
+ ).plus(this.timeZeroOffset)
160
+ }
161
+ }
162
+ this.log("info", `text received (start: ${tsStart.toMillis()}ms, ` +
163
+ `end: ${tsEnd.toMillis()}ms, ` +
164
+ `kind: ${isFinal ? "final" : "intermediate"}): ` +
165
+ `"${text}"`)
166
+
167
+ /* fetch and merge meta information */
168
+ const metas = metastore.fetch(tsStart, tsEnd)
169
+ const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
170
+ curr.forEach((val, key) => { prev.set(key, val) })
171
+ return prev
172
+ }, new Map<string, any>())
173
+ metastore.prune(tsStart)
174
+
175
+ /* add word timing to meta */
176
+ if (words.length > 0)
177
+ meta.set("words", words)
178
+
179
+ /* create and enqueue chunk */
180
+ const chunk = new SpeechFlowChunk(tsStart, tsEnd,
181
+ isFinal ? "final" : "intermediate", "text", text, meta)
182
+ this.queue.write(chunk)
183
+ }
184
+ })
185
+ this.recognizeStream.on("error", (error: Error) => {
186
+ this.log("error", `error: ${error.message}`)
187
+ if (!this.closing && this.queue !== null)
188
+ this.queue.write(null)
189
+ this.emit("error", error)
190
+ })
191
+ this.recognizeStream.on("end", () => {
192
+ this.log("info", "stream ended")
193
+ if (!this.closing && this.queue !== null)
194
+ this.queue.write(null)
195
+ })
196
+
197
+ /* remember opening time to receive time zero offset */
198
+ this.timeOpen = DateTime.now()
199
+
200
+ /* provide Duplex stream and internally attach to Google Speech API */
201
+ const self = this
202
+ const reads = new util.PromiseSet<void>()
203
+ this.stream = new Stream.Duplex({
204
+ writableObjectMode: true,
205
+ readableObjectMode: true,
206
+ decodeStrings: false,
207
+ highWaterMark: 1,
208
+ write (chunk: SpeechFlowChunk, encoding, callback) {
209
+ if (self.closing || self.recognizeStream === null) {
210
+ callback(new Error("stream already destroyed"))
211
+ return
212
+ }
213
+ if (chunk.type !== "audio")
214
+ callback(new Error("expected audio input chunk"))
215
+ else if (!Buffer.isBuffer(chunk.payload))
216
+ callback(new Error("expected Buffer input chunk"))
217
+ else {
218
+ if (chunk.payload.byteLength > 0) {
219
+ self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
220
+ if (chunk.meta.size > 0)
221
+ metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
222
+ try {
223
+ self.recognizeStream.write(chunk.payload)
224
+ }
225
+ catch (error) {
226
+ callback(util.ensureError(error, "failed to send to Google Speech"))
227
+ return
228
+ }
229
+ }
230
+ callback()
231
+ }
232
+ },
233
+ async final (callback) {
234
+ /* short-circuiting in case of own closing */
235
+ if (self.closing || self.recognizeStream === null) {
236
+ callback()
237
+ return
238
+ }
239
+
240
+ /* close Google Speech stream */
241
+ try {
242
+ self.recognizeStream.end()
243
+ }
244
+ catch (error) {
245
+ self.log("warning", `error closing Google Speech stream: ${error}`)
246
+ }
247
+
248
+ /* await all read operations */
249
+ await reads.awaitAll()
250
+ callback()
251
+ },
252
+ read (size) {
253
+ if (self.closing || self.queue === null) {
254
+ this.push(null)
255
+ return
256
+ }
257
+ reads.add(self.queue.read().then((chunk) => {
258
+ if (self.closing || self.queue === null) {
259
+ this.push(null)
260
+ return
261
+ }
262
+ if (chunk === null) {
263
+ self.log("info", "received EOF signal")
264
+ this.push(null)
265
+ }
266
+ else {
267
+ self.log("debug", `received data (${chunk.payload.length} bytes)`)
268
+ this.push(chunk)
269
+ }
270
+ }).catch((error: unknown) => {
271
+ if (!self.closing && self.queue !== null)
272
+ self.log("error", `queue read error: ${util.ensureError(error).message}`)
273
+ }))
274
+ }
275
+ })
276
+ }
277
+
278
+ /* close node */
279
+ async close () {
280
+ /* indicate closing first to stop all async operations */
281
+ this.closing = true
282
+
283
+ /* cleanup all timers */
284
+ if (this.connectionTimeout !== null) {
285
+ clearTimeout(this.connectionTimeout)
286
+ this.connectionTimeout = null
287
+ }
288
+
289
+ /* shutdown stream */
290
+ if (this.stream !== null) {
291
+ await util.destroyStream(this.stream)
292
+ this.stream = null
293
+ }
294
+
295
+ /* close Google Speech stream and client */
296
+ if (this.recognizeStream !== null) {
297
+ try {
298
+ this.recognizeStream.removeAllListeners()
299
+ this.recognizeStream.destroy()
300
+ }
301
+ catch (error) {
302
+ this.log("warning", `error during Google Speech stream cleanup: ${error}`)
303
+ }
304
+ this.recognizeStream = null
305
+ }
306
+ if (this.client !== null) {
307
+ try {
308
+ await this.client.close()
309
+ }
310
+ catch (error) {
311
+ this.log("warning", `error closing Google Speech client: ${error}`)
312
+ }
313
+ this.client = null
314
+ }
315
+
316
+ /* signal EOF to any pending read operations */
317
+ if (this.queue !== null) {
318
+ this.queue.write(null)
319
+ this.queue = null
320
+ }
321
+ }
322
+ }
@@ -43,6 +43,10 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
43
43
  interim: { type: "boolean", val: false }
44
44
  })
45
45
 
46
+ /* sanity check parameters */
47
+ if (!this.params.key)
48
+ throw new Error("OpenAI API key not configured")
49
+
46
50
  /* declare node input/output format */
47
51
  this.input = "audio"
48
52
  this.output = "text"
@@ -349,10 +353,10 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
349
353
  this.ws.close()
350
354
  this.ws = null
351
355
  }
352
- if (this.openai !== null)
353
- this.openai = null
354
- if (this.resampler !== null)
355
- this.resampler = null
356
+ this.openai = null
357
+
358
+ /* close resampler */
359
+ this.resampler = null
356
360
 
357
361
  /* shutdown stream */
358
362
  if (this.stream !== null) {
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
83
83
  "Ruth": { language: "en", languageCode: "en-US", engine: "generative" },
84
84
  "Stephen": { language: "en", languageCode: "en-US", engine: "generative" },
85
85
  "Vicki": { language: "de", languageCode: "de-DE", engine: "generative" },
86
- "Daniel": { language: "de", languageCode: "de-DE", engine: "generative" },
86
+ "Daniel": { language: "de", languageCode: "de-DE", engine: "generative" }
87
87
  }
88
88
  const voiceConfig = voices[this.params.voice as keyof typeof voices]
89
89
  if (voiceConfig === undefined)
@@ -147,11 +147,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
147
147
  callback()
148
148
  },
149
149
  final (callback) {
150
- if (self.closing) {
151
- callback()
152
- return
153
- }
154
- this.push(null)
155
150
  callback()
156
151
  }
157
152
  })
@@ -162,6 +157,12 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
162
157
  /* indicate closing */
163
158
  this.closing = true
164
159
 
160
+ /* shutdown stream */
161
+ if (this.stream !== null) {
162
+ await util.destroyStream(this.stream)
163
+ this.stream = null
164
+ }
165
+
165
166
  /* destroy resampler */
166
167
  if (this.resampler !== null)
167
168
  this.resampler = null
@@ -171,11 +172,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
171
172
  this.client.destroy()
172
173
  this.client = null
173
174
  }
174
- /* shutdown stream */
175
- if (this.stream !== null) {
176
- await util.destroyStream(this.stream)
177
- this.stream = null
178
- }
179
175
  }
180
176
  }
181
177
 
@@ -103,14 +103,15 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
103
103
  throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
104
104
  }
105
105
  const labels = voice.labels ?? {}
106
- const info = Object.keys(labels).length > 0 ?
107
- ", " + Object.entries(labels).map(([ key, val ]) => `${key}: "${val}"`).join(", ") : ""
106
+ const info = Object.keys(labels).length > 0
107
+ ? ", " + Object.entries(labels).map(([ key, val ]) => `${key}: "${val}"`).join(", ")
108
+ : ""
108
109
  this.log("info", `selected voice: name: "${voice.name}"${info}`)
109
110
 
110
111
  /* perform text-to-speech operation with Elevenlabs API */
111
- const model = this.params.optimize === "quality" ?
112
- "eleven_turbo_v2_5" :
113
- "eleven_flash_v2_5"
112
+ const model = this.params.optimize === "quality"
113
+ ? "eleven_turbo_v2_5"
114
+ : "eleven_flash_v2_5"
114
115
  const speechStream = (text: string) => {
115
116
  this.log("info", `ElevenLabs: send text "${text}"`)
116
117
  return this.elevenlabs!.textToSpeech.convert(voice.voiceId, {