speechflow 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/etc/stx.conf +54 -58
  3. package/package.json +25 -106
  4. package/{etc → speechflow-cli/etc}/eslint.mjs +1 -2
  5. package/speechflow-cli/etc/stx.conf +77 -0
  6. package/speechflow-cli/package.json +116 -0
  7. package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +148 -64
  8. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
  9. package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +39 -11
  10. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +384 -0
  11. package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +27 -11
  12. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +313 -0
  13. package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +59 -12
  14. package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +11 -4
  15. package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +9 -4
  16. package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +2 -2
  17. package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +1 -1
  18. package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +1 -1
  19. package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +37 -20
  20. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
  21. package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +4 -3
  22. package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +9 -5
  23. package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +16 -8
  24. package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +12 -8
  25. package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +9 -3
  26. package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +5 -2
  27. package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +12 -12
  28. package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
  29. package/{src → speechflow-cli/src}/speechflow-utils.ts +78 -44
  30. package/{src → speechflow-cli/src}/speechflow.ts +188 -53
  31. package/speechflow-ui-db/etc/eslint.mjs +106 -0
  32. package/speechflow-ui-db/etc/htmllint.json +55 -0
  33. package/speechflow-ui-db/etc/stx.conf +79 -0
  34. package/speechflow-ui-db/etc/stylelint.js +46 -0
  35. package/speechflow-ui-db/etc/stylelint.yaml +33 -0
  36. package/speechflow-ui-db/etc/tsc-client.json +30 -0
  37. package/speechflow-ui-db/etc/tsc.node.json +9 -0
  38. package/speechflow-ui-db/etc/vite-client.mts +63 -0
  39. package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
  40. package/speechflow-ui-db/package.json +75 -0
  41. package/speechflow-ui-db/src/app-icon.ai +1989 -4
  42. package/speechflow-ui-db/src/app-icon.svg +26 -0
  43. package/speechflow-ui-db/src/app.styl +64 -0
  44. package/speechflow-ui-db/src/app.vue +221 -0
  45. package/speechflow-ui-db/src/index.html +23 -0
  46. package/speechflow-ui-db/src/index.ts +26 -0
  47. package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
  48. package/speechflow-ui-db/src/tsconfig.json +3 -0
  49. package/speechflow-ui-st/etc/eslint.mjs +106 -0
  50. package/speechflow-ui-st/etc/htmllint.json +55 -0
  51. package/speechflow-ui-st/etc/stx.conf +79 -0
  52. package/speechflow-ui-st/etc/stylelint.js +46 -0
  53. package/speechflow-ui-st/etc/stylelint.yaml +33 -0
  54. package/speechflow-ui-st/etc/tsc-client.json +30 -0
  55. package/speechflow-ui-st/etc/tsc.node.json +9 -0
  56. package/speechflow-ui-st/etc/vite-client.mts +63 -0
  57. package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
  58. package/speechflow-ui-st/package.json +79 -0
  59. package/speechflow-ui-st/src/app-icon.ai +1989 -4
  60. package/speechflow-ui-st/src/app-icon.svg +26 -0
  61. package/speechflow-ui-st/src/app.styl +64 -0
  62. package/speechflow-ui-st/src/app.vue +142 -0
  63. package/speechflow-ui-st/src/index.html +23 -0
  64. package/speechflow-ui-st/src/index.ts +26 -0
  65. package/speechflow-ui-st/src/lib.d.ts +9 -0
  66. package/speechflow-ui-st/src/tsconfig.json +3 -0
  67. package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
  68. package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
  69. package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
  70. package/dst/speechflow-node-a2a-gender.d.ts +0 -18
  71. package/dst/speechflow-node-a2a-gender.js +0 -271
  72. package/dst/speechflow-node-a2a-gender.js.map +0 -1
  73. package/dst/speechflow-node-a2a-meter.d.ts +0 -12
  74. package/dst/speechflow-node-a2a-meter.js +0 -155
  75. package/dst/speechflow-node-a2a-meter.js.map +0 -1
  76. package/dst/speechflow-node-a2a-mute.d.ts +0 -16
  77. package/dst/speechflow-node-a2a-mute.js +0 -91
  78. package/dst/speechflow-node-a2a-mute.js.map +0 -1
  79. package/dst/speechflow-node-a2a-vad.d.ts +0 -16
  80. package/dst/speechflow-node-a2a-vad.js +0 -285
  81. package/dst/speechflow-node-a2a-vad.js.map +0 -1
  82. package/dst/speechflow-node-a2a-wav.d.ts +0 -11
  83. package/dst/speechflow-node-a2a-wav.js +0 -195
  84. package/dst/speechflow-node-a2a-wav.js.map +0 -1
  85. package/dst/speechflow-node-a2t-deepgram.d.ts +0 -15
  86. package/dst/speechflow-node-a2t-deepgram.js +0 -255
  87. package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
  88. package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -16
  89. package/dst/speechflow-node-t2a-elevenlabs.js +0 -195
  90. package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
  91. package/dst/speechflow-node-t2a-kokoro.d.ts +0 -13
  92. package/dst/speechflow-node-t2a-kokoro.js +0 -149
  93. package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
  94. package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
  95. package/dst/speechflow-node-t2t-deepl.js +0 -142
  96. package/dst/speechflow-node-t2t-deepl.js.map +0 -1
  97. package/dst/speechflow-node-t2t-format.d.ts +0 -11
  98. package/dst/speechflow-node-t2t-format.js +0 -82
  99. package/dst/speechflow-node-t2t-format.js.map +0 -1
  100. package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
  101. package/dst/speechflow-node-t2t-ollama.js +0 -247
  102. package/dst/speechflow-node-t2t-ollama.js.map +0 -1
  103. package/dst/speechflow-node-t2t-openai.d.ts +0 -13
  104. package/dst/speechflow-node-t2t-openai.js +0 -227
  105. package/dst/speechflow-node-t2t-openai.js.map +0 -1
  106. package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
  107. package/dst/speechflow-node-t2t-sentence.js +0 -234
  108. package/dst/speechflow-node-t2t-sentence.js.map +0 -1
  109. package/dst/speechflow-node-t2t-subtitle.d.ts +0 -13
  110. package/dst/speechflow-node-t2t-subtitle.js +0 -278
  111. package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
  112. package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
  113. package/dst/speechflow-node-t2t-transformers.js +0 -265
  114. package/dst/speechflow-node-t2t-transformers.js.map +0 -1
  115. package/dst/speechflow-node-x2x-filter.d.ts +0 -11
  116. package/dst/speechflow-node-x2x-filter.js +0 -117
  117. package/dst/speechflow-node-x2x-filter.js.map +0 -1
  118. package/dst/speechflow-node-x2x-trace.d.ts +0 -11
  119. package/dst/speechflow-node-x2x-trace.js +0 -111
  120. package/dst/speechflow-node-x2x-trace.js.map +0 -1
  121. package/dst/speechflow-node-xio-device.d.ts +0 -13
  122. package/dst/speechflow-node-xio-device.js +0 -226
  123. package/dst/speechflow-node-xio-device.js.map +0 -1
  124. package/dst/speechflow-node-xio-file.d.ts +0 -11
  125. package/dst/speechflow-node-xio-file.js +0 -210
  126. package/dst/speechflow-node-xio-file.js.map +0 -1
  127. package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
  128. package/dst/speechflow-node-xio-mqtt.js +0 -185
  129. package/dst/speechflow-node-xio-mqtt.js.map +0 -1
  130. package/dst/speechflow-node-xio-websocket.d.ts +0 -13
  131. package/dst/speechflow-node-xio-websocket.js +0 -278
  132. package/dst/speechflow-node-xio-websocket.js.map +0 -1
  133. package/dst/speechflow-node.d.ts +0 -65
  134. package/dst/speechflow-node.js +0 -180
  135. package/dst/speechflow-node.js.map +0 -1
  136. package/dst/speechflow-utils.d.ts +0 -69
  137. package/dst/speechflow-utils.js +0 -486
  138. package/dst/speechflow-utils.js.map +0 -1
  139. package/dst/speechflow.js +0 -768
  140. package/dst/speechflow.js.map +0 -1
  141. package/src/speechflow-node-a2a-meter.ts +0 -130
  142. package/src/speechflow-node-a2a-vad.ts +0 -285
  143. package/src/speechflow-node-a2t-deepgram.ts +0 -234
  144. package/src/speechflow-node-t2t-subtitle.ts +0 -149
  145. /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
  146. /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
  147. /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
  148. /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
  149. /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
  150. /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
  151. /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
  152. /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
  153. /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
  154. /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
  155. /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
  156. /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
@@ -0,0 +1,313 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import * as Deepgram from "@deepgram/sdk"
12
+ import { DateTime, Duration } from "luxon"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+ import * as utils from "./speechflow-utils"
17
+
18
+ /* SpeechFlow node for Deepgram speech-to-text conversion */
19
+ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
20
+ /* declare official node name */
21
+ public static name = "deepgram"
22
+
23
+ /* internal state */
24
+ private dg: Deepgram.LiveClient | null = null
25
+ private destroyed = false
26
+ private initTimeout: ReturnType<typeof setTimeout> | null = null
27
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
28
+ private queue: utils.SingleQueue<SpeechFlowChunk | null> | null = null
29
+
30
+ /* construct node */
31
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
32
+ super(id, cfg, opts, args)
33
+
34
+ /* declare node configuration parameters */
35
+ this.configure({
36
+ key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
37
+ keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
38
+ model: { type: "string", val: "nova-2", pos: 0 },
39
+ version: { type: "string", val: "latest", pos: 1 },
40
+ language: { type: "string", val: "multi", pos: 2 },
41
+ interim: { type: "boolean", val: false, pos: 3 }
42
+ })
43
+
44
+ /* declare node input/output format */
45
+ this.input = "audio"
46
+ this.output = "text"
47
+ }
48
+
49
+ /* one-time status of node */
50
+ async status () {
51
+ let balance = 0
52
+ try {
53
+ const deepgram = Deepgram.createClient(this.params.keyAdm)
54
+ const response = await deepgram.manage.getProjects()
55
+ if (response !== null && response.error === null) {
56
+ for (const project of response.result.projects) {
57
+ const response = await deepgram.manage.getProjectBalances(project.project_id)
58
+ if (response !== null && response.error === null)
59
+ balance += response.result.balances[0]?.amount ?? 0
60
+ }
61
+ }
62
+ }
63
+ catch (error) {
64
+ this.log("warning", `failed to fetch balance: ${error}`)
65
+ }
66
+ return { balance: balance.toFixed(2) }
67
+ }
68
+
69
+ /* open node */
70
+ async open () {
71
+ /* sanity check situation */
72
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
73
+ throw new Error("Deepgram node currently supports PCM-S16LE audio only")
74
+
75
+ /* clear destruction flag */
76
+ this.destroyed = false
77
+
78
+ /* create queue for results */
79
+ this.queue = new utils.SingleQueue<SpeechFlowChunk | null>()
80
+
81
+ /* create a store for the meta information */
82
+ const metastore = new utils.TimeStore<Map<string, any>>()
83
+
84
+ /* connect to Deepgram API */
85
+ const deepgram = Deepgram.createClient(this.params.key)
86
+ let language = "en"
87
+ if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
88
+ language = this.params.language
89
+ else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
90
+ language = "multi"
91
+ this.dg = deepgram.listen.live({
92
+ mip_opt_out: true,
93
+ model: this.params.model,
94
+ version: this.params.version,
95
+ language,
96
+ channels: this.config.audioChannels,
97
+ sample_rate: this.config.audioSampleRate,
98
+ encoding: "linear16",
99
+ multichannel: false,
100
+ endpointing: false,
101
+ interim_results: this.params.interim,
102
+ smart_format: true,
103
+ punctuate: true,
104
+ filler_words: true,
105
+ numerals: true,
106
+ diarize: false,
107
+ profanity_filter: false,
108
+ redact: false
109
+ })
110
+
111
+ /* hook onto Deepgram API events */
112
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
113
+ if (this.destroyed || this.queue === null)
114
+ return
115
+ const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
116
+ const words = (data.channel?.alternatives[0]?.words ?? []) as
117
+ { word: string, punctuated_word?: string, start: number, end: number }[]
118
+ const isFinal = (data.is_final ?? false) as boolean
119
+ if (text === "")
120
+ this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
121
+ else {
122
+ this.log("info", `text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s): "${text}"`)
123
+ const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
124
+ const end = start.plus({ seconds: data.duration })
125
+ const metas = metastore.fetch(start, end)
126
+ const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
127
+ curr.forEach((val, key) => { prev.set(key, val) })
128
+ return prev
129
+ }, new Map<string, any>())
130
+ metastore.prune(start)
131
+ meta.set("words", words.map((word) => {
132
+ const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
133
+ const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
134
+ return { word: word.punctuated_word ?? word.word, start, end }
135
+ }))
136
+ const chunk = new SpeechFlowChunk(start, end,
137
+ isFinal ? "final" : "intermediate", "text", text, meta)
138
+ this.queue.write(chunk)
139
+ }
140
+ })
141
+ this.dg.on(Deepgram.LiveTranscriptionEvents.SpeechStarted, (data) => {
142
+ this.log("info", "speech started", data)
143
+ })
144
+ this.dg.on(Deepgram.LiveTranscriptionEvents.UtteranceEnd, (data) => {
145
+ this.log("info", "utterance end received", data)
146
+ })
147
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
148
+ this.log("info", "metadata received")
149
+ })
150
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
151
+ this.log("info", "connection close")
152
+ if (!this.destroyed && this.queue !== null)
153
+ this.queue.write(null)
154
+ })
155
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
156
+ this.log("error", `error: ${error.message}`)
157
+ if (!this.destroyed && this.queue !== null)
158
+ this.queue.write(null)
159
+ this.emit("error")
160
+ })
161
+
162
+ /* wait for Deepgram API to be available */
163
+ await new Promise((resolve, reject) => {
164
+ this.connectionTimeout = setTimeout(() => {
165
+ if (this.connectionTimeout !== null) {
166
+ this.connectionTimeout = null
167
+ reject(new Error("Deepgram: timeout waiting for connection open"))
168
+ }
169
+ }, 8000)
170
+ this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
171
+ this.log("info", "connection open")
172
+ if (this.connectionTimeout !== null) {
173
+ clearTimeout(this.connectionTimeout)
174
+ this.connectionTimeout = null
175
+ }
176
+ resolve(true)
177
+ })
178
+ })
179
+
180
+ /* remember opening time to receive time zero offset */
181
+ this.timeOpen = DateTime.now()
182
+
183
+ /* provide Duplex stream and internally attach to Deepgram API */
184
+ const self = this
185
+ this.stream = new Stream.Duplex({
186
+ writableObjectMode: true,
187
+ readableObjectMode: true,
188
+ decodeStrings: false,
189
+ highWaterMark: 1,
190
+ write (chunk: SpeechFlowChunk, encoding, callback) {
191
+ if (self.destroyed || self.dg === null) {
192
+ callback(new Error("stream already destroyed"))
193
+ return
194
+ }
195
+ if (chunk.type !== "audio")
196
+ callback(new Error("expected audio input chunk"))
197
+ else if (!Buffer.isBuffer(chunk.payload))
198
+ callback(new Error("expected Buffer input chunk"))
199
+ else {
200
+ if (chunk.payload.byteLength > 0) {
201
+ self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
202
+ if (chunk.meta.size > 0)
203
+ metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
204
+ try {
205
+ self.dg.send(chunk.payload.buffer) /* intentionally discard all time information */
206
+ }
207
+ catch (error) {
208
+ callback(error instanceof Error ? error : new Error("failed to send to Deepgram"))
209
+ return
210
+ }
211
+ }
212
+ callback()
213
+ }
214
+ },
215
+ read (size) {
216
+ if (self.destroyed || self.queue === null) {
217
+ this.push(null)
218
+ return
219
+ }
220
+ let readTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
221
+ if (readTimeout !== null) {
222
+ readTimeout = null
223
+ if (!self.destroyed) {
224
+ self.log("warning", "read timeout - pushing null to prevent hanging")
225
+ this.push(null)
226
+ }
227
+ }
228
+ }, 30 * 1000)
229
+ self.queue.read().then((chunk) => {
230
+ if (readTimeout !== null) {
231
+ clearTimeout(readTimeout)
232
+ readTimeout = null
233
+ }
234
+ if (self.destroyed) {
235
+ this.push(null)
236
+ return
237
+ }
238
+ if (chunk === null) {
239
+ self.log("info", "received EOF signal")
240
+ this.push(null)
241
+ }
242
+ else {
243
+ self.log("debug", `received data (${chunk.payload.length} bytes)`)
244
+ this.push(chunk, self.config.textEncoding)
245
+ }
246
+ }).catch((error) => {
247
+ if (readTimeout !== null) {
248
+ clearTimeout(readTimeout)
249
+ readTimeout = null
250
+ }
251
+ if (!self.destroyed) {
252
+ self.log("error", `queue read error: ${error.message}`)
253
+ this.push(null)
254
+ }
255
+ })
256
+ },
257
+ final (callback) {
258
+ if (self.destroyed || self.dg === null) {
259
+ callback()
260
+ return
261
+ }
262
+ try {
263
+ self.dg.requestClose()
264
+ }
265
+ catch (error) {
266
+ self.log("warning", `error closing Deepgram connection: ${error}`)
267
+ }
268
+ /* NOTICE: do not push null here -- let the Deepgram close event handle it */
269
+ callback()
270
+ }
271
+ })
272
+ }
273
+
274
+ /* close node */
275
+ async close () {
276
+ /* indicate destruction first to stop all async operations */
277
+ this.destroyed = true
278
+
279
+ /* cleanup all timers */
280
+ if (this.initTimeout !== null) {
281
+ clearTimeout(this.initTimeout)
282
+ this.initTimeout = null
283
+ }
284
+ if (this.connectionTimeout !== null) {
285
+ clearTimeout(this.connectionTimeout)
286
+ this.connectionTimeout = null
287
+ }
288
+
289
+ /* close stream */
290
+ if (this.stream !== null) {
291
+ this.stream.destroy()
292
+ this.stream = null
293
+ }
294
+
295
+ /* close Deepgram connection and remove listeners */
296
+ if (this.dg !== null) {
297
+ try {
298
+ this.dg.removeAllListeners()
299
+ this.dg.requestClose()
300
+ }
301
+ catch (error) {
302
+ this.log("warning", `error during Deepgram cleanup: ${error}`)
303
+ }
304
+ this.dg = null
305
+ }
306
+
307
+ /* signal EOF to any pending read operations */
308
+ if (this.queue !== null) {
309
+ this.queue.write(null)
310
+ this.queue = null
311
+ }
312
+ }
313
+ }
@@ -23,6 +23,8 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
23
23
  /* internal state */
24
24
  private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
25
25
  private static speexInitialized = false
26
+ private destroyed = false
27
+ private resampler: SpeexResampler | null = null
26
28
 
27
29
  /* construct node */
28
30
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -39,6 +41,10 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
39
41
  optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
40
42
  })
41
43
 
44
+ /* sanity check parameters */
45
+ if (!this.params.key)
46
+ throw new Error("ElevenLabs API key not configured")
47
+
42
48
  /* declare node input/output format */
43
49
  this.input = "text"
44
50
  this.output = "audio"
@@ -54,6 +60,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
54
60
 
55
61
  /* open node */
56
62
  async open () {
63
+ /* clear destruction flag */
64
+ this.destroyed = false
65
+
57
66
  /* establish ElevenLabs API connection */
58
67
  this.elevenlabs = new ElevenLabs.ElevenLabsClient({
59
68
  apiKey: this.params.key
@@ -120,37 +129,68 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
120
129
  await SpeexResampler.initPromise
121
130
  SpeechFlowNodeElevenlabs.speexInitialized = true
122
131
  }
123
- const resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
132
+ this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
124
133
 
125
134
  /* create transform stream and connect it to the ElevenLabs API */
126
- const log = (level: string, msg: string) => { this.log(level, msg) }
135
+ const self = this
127
136
  this.stream = new Stream.Transform({
128
137
  writableObjectMode: true,
129
138
  readableObjectMode: true,
130
139
  decodeStrings: false,
131
140
  highWaterMark: 1,
132
141
  transform (chunk: SpeechFlowChunk, encoding, callback) {
142
+ if (self.destroyed) {
143
+ callback(new Error("stream already destroyed"))
144
+ return
145
+ }
133
146
  if (Buffer.isBuffer(chunk.payload))
134
147
  callback(new Error("invalid chunk payload type"))
135
148
  else {
136
- speechStream(chunk.payload).then((stream) => {
137
- getStreamAsBuffer(stream).then((buffer) => {
138
- const bufferResampled = resampler.processChunk(buffer)
139
- log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
149
+ (async () => {
150
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
151
+ processTimeout = null
152
+ callback(new Error("ElevenLabs API timeout"))
153
+ }, 60 * 1000)
154
+ const clearProcessTimeout = () => {
155
+ if (processTimeout !== null) {
156
+ clearTimeout(processTimeout)
157
+ processTimeout = null
158
+ }
159
+ }
160
+ try {
161
+ const stream = await speechStream(chunk.payload as string)
162
+ if (self.destroyed) {
163
+ clearProcessTimeout()
164
+ callback(new Error("stream destroyed during processing"))
165
+ return
166
+ }
167
+ const buffer = await getStreamAsBuffer(stream)
168
+ if (self.destroyed) {
169
+ clearProcessTimeout()
170
+ callback(new Error("stream destroyed during processing"))
171
+ return
172
+ }
173
+ const bufferResampled = self.resampler!.processChunk(buffer)
174
+ self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
140
175
  const chunkNew = chunk.clone()
141
176
  chunkNew.type = "audio"
142
177
  chunkNew.payload = bufferResampled
178
+ clearProcessTimeout()
143
179
  this.push(chunkNew)
144
180
  callback()
145
- }).catch((error) => {
146
- callback(error)
147
- })
148
- }).catch((error) => {
149
- callback(error)
150
- })
181
+ }
182
+ catch (error) {
183
+ clearProcessTimeout()
184
+ callback(error instanceof Error ? error : new Error("ElevenLabs processing failed"))
185
+ }
186
+ })()
151
187
  }
152
188
  },
153
189
  final (callback) {
190
+ if (self.destroyed) {
191
+ callback()
192
+ return
193
+ }
154
194
  this.push(null)
155
195
  callback()
156
196
  }
@@ -159,12 +199,19 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
159
199
 
160
200
  /* close node */
161
201
  async close () {
202
+ /* indicate destruction */
203
+ this.destroyed = true
204
+
162
205
  /* destroy stream */
163
206
  if (this.stream !== null) {
164
207
  this.stream.destroy()
165
208
  this.stream = null
166
209
  }
167
210
 
211
+ /* destroy resampler */
212
+ if (this.resampler !== null)
213
+ this.resampler = null
214
+
168
215
  /* destroy ElevenLabs API */
169
216
  if (this.elevenlabs !== null)
170
217
  this.elevenlabs = null
@@ -21,6 +21,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
21
21
 
22
22
  /* internal state */
23
23
  private kokoro: KokoroTTS | null = null
24
+ private resampler: SpeexResampler | null = null
24
25
  private static speexInitialized = false
25
26
 
26
27
  /* construct node */
@@ -59,9 +60,11 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
59
60
  const interval = setInterval(() => {
60
61
  for (const [ artifact, percent ] of progressState) {
61
62
  this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
62
- if (percent >= 1.0)
63
+ if (percent >= 100.0)
63
64
  progressState.delete(artifact)
64
65
  }
66
+ if (progressState.size === 0)
67
+ clearInterval(interval)
65
68
  }, 1000)
66
69
  this.kokoro = await KokoroTTS.from_pretrained(model, {
67
70
  dtype: "q4f16",
@@ -78,7 +81,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
78
81
  await SpeexResampler.initPromise
79
82
  SpeechFlowNodeKokoro.speexInitialized = true
80
83
  }
81
- const resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
84
+ this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
82
85
 
83
86
  /* determine voice for text-to-speech operation */
84
87
  const voices = {
@@ -91,7 +94,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
91
94
  if (voice === undefined)
92
95
  throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
93
96
 
94
- /* perform text-to-speech operation with Elevenlabs API */
97
+ /* perform text-to-speech operation with Kokoro API */
95
98
  const text2speech = async (text: string) => {
96
99
  this.log("info", `Kokoro: input: "${text}"`)
97
100
  const audio = await this.kokoro!.generate(text, {
@@ -110,7 +113,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
110
113
  }
111
114
 
112
115
  /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
113
- const buffer2 = resampler.processChunk(buffer1)
116
+ const buffer2 = this.resampler!.processChunk(buffer1)
114
117
 
115
118
  return buffer2
116
119
  }
@@ -153,6 +156,10 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
153
156
  this.stream = null
154
157
  }
155
158
 
159
+ /* destroy resampler */
160
+ if (this.resampler !== null)
161
+ this.resampler = null
162
+
156
163
  /* destroy Kokoro API */
157
164
  if (this.kokoro !== null)
158
165
  this.kokoro = null
@@ -27,12 +27,16 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
27
27
 
28
28
  /* declare node configuration parameters */
29
29
  this.configure({
30
- key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY },
30
+ key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY ?? "" },
31
31
  src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
32
32
  dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
33
33
  optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
34
34
  })
35
35
 
36
+ /* validate API key */
37
+ if (this.params.key === "")
38
+ throw new Error("DeepL API key is required")
39
+
36
40
  /* sanity check situation */
37
41
  if (this.params.src === this.params.dst)
38
42
  throw new Error("source and destination languages cannot be the same")
@@ -44,9 +48,10 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
44
48
 
45
49
  /* one-time status of node */
46
50
  async status () {
47
- this.deepl = new DeepL.Translator(this.params.key)
48
- const usage = await this.deepl.getUsage()
49
- const percent = (usage?.character?.count ?? 0) / (usage?.character?.limit ?? 0) * 100
51
+ const deepl = new DeepL.Translator(this.params.key)
52
+ const usage = await deepl.getUsage()
53
+ const limit = usage?.character?.limit ?? 1
54
+ const percent = limit > 0 ? ((usage?.character?.count ?? 0) / limit * 100) : 0
50
55
  return { usage: `${percent.toFixed(8)}%` }
51
56
  }
52
57
 
@@ -41,7 +41,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
41
41
  return text
42
42
  }
43
43
 
44
- /* establish a duplex stream and connect it to DeepL translation */
44
+ /* establish a duplex stream and connect it to text formatting */
45
45
  this.stream = new Stream.Transform({
46
46
  readableObjectMode: true,
47
47
  writableObjectMode: true,
@@ -74,7 +74,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
74
74
  })
75
75
  }
76
76
 
77
- /* open node */
77
+ /* close node */
78
78
  async close () {
79
79
  /* close stream */
80
80
  if (this.stream !== null) {
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
39
39
  "Do NOT give any preamble.\n" +
40
40
  "Do NOT give any prolog.\n" +
41
41
  "Do NOT give any epilog.\n" +
42
- "Do NOT change the gammar.\n" +
42
+ "Do NOT change the grammar.\n" +
43
43
  "Do NOT use synonyms for words.\n" +
44
44
  "Keep all words.\n" +
45
45
  "Fill in missing commas.\n" +
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
39
39
  "Do NOT give any preamble.\n" +
40
40
  "Do NOT give any prolog.\n" +
41
41
  "Do NOT give any epilog.\n" +
42
- "Do NOT change the gammar.\n" +
42
+ "Do NOT change the grammar.\n" +
43
43
  "Do NOT use synonyms for words.\n" +
44
44
  "Keep all words.\n" +
45
45
  "Fill in missing commas.\n" +