speechflow 1.6.7 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +77 -52
  3. package/etc/secretlint.json +7 -0
  4. package/etc/speechflow.yaml +13 -4
  5. package/etc/stx.conf +3 -2
  6. package/package.json +8 -6
  7. package/speechflow-cli/dst/speechflow-main-api.js +9 -8
  8. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-graph.js +13 -14
  10. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-main-status.js +38 -8
  12. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +3 -0
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -2
  16. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -2
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
  21. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
  23. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
  25. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +3 -4
  27. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
  29. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +1 -2
  31. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
  33. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
  35. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +8 -2
  37. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +17 -19
  40. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +0 -1
  42. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +30 -25
  43. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  44. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +79 -48
  45. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +6 -11
  47. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  48. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +45 -44
  49. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +2 -0
  51. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +19 -7
  52. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  53. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
  54. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
  56. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
  58. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
  60. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
  62. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
  64. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
  66. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
  68. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
  70. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +10 -1
  71. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  72. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
  73. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-xio-device.js +5 -5
  75. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  76. package/speechflow-cli/dst/speechflow-node-xio-file.js +4 -4
  77. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +9 -3
  79. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +16 -5
  81. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-util-audio.js +3 -3
  83. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  84. package/speechflow-cli/dst/speechflow-util-error.d.ts +0 -1
  85. package/speechflow-cli/dst/speechflow-util-error.js +0 -7
  86. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
  88. package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
  89. package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
  90. package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
  91. package/speechflow-cli/dst/speechflow-util-queue.js +36 -15
  92. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  93. package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
  94. package/speechflow-cli/dst/speechflow-util-stream.js +17 -19
  95. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  97. package/speechflow-cli/dst/speechflow-util.js +1 -0
  98. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  99. package/speechflow-cli/etc/oxlint.jsonc +6 -1
  100. package/speechflow-cli/etc/stx.conf +1 -0
  101. package/speechflow-cli/package.json +28 -27
  102. package/speechflow-cli/src/speechflow-main-api.ts +9 -11
  103. package/speechflow-cli/src/speechflow-main-graph.ts +15 -16
  104. package/speechflow-cli/src/speechflow-main-status.ts +6 -10
  105. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -0
  106. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -2
  107. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -1
  108. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -2
  109. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
  110. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
  111. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
  112. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -4
  113. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
  114. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -2
  115. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
  116. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
  117. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +9 -3
  118. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +27 -27
  119. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +37 -28
  120. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +92 -56
  121. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +7 -11
  122. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +47 -43
  123. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +22 -7
  124. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
  125. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
  126. package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
  127. package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
  128. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
  129. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
  130. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
  131. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
  132. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +16 -4
  133. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +3 -8
  134. package/speechflow-cli/src/speechflow-node-xio-device.ts +6 -9
  135. package/speechflow-cli/src/speechflow-node-xio-file.ts +4 -4
  136. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +10 -4
  137. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +16 -5
  138. package/speechflow-cli/src/speechflow-util-audio-wt.ts +4 -4
  139. package/speechflow-cli/src/speechflow-util-audio.ts +7 -7
  140. package/speechflow-cli/src/speechflow-util-error.ts +0 -7
  141. package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
  142. package/speechflow-cli/src/speechflow-util-queue.ts +40 -20
  143. package/speechflow-cli/src/speechflow-util-stream.ts +29 -24
  144. package/speechflow-cli/src/speechflow-util.ts +1 -0
  145. package/speechflow-ui-db/dst/index.css +1 -5
  146. package/speechflow-ui-db/dst/index.js +14 -58
  147. package/speechflow-ui-db/etc/stx.conf +5 -16
  148. package/speechflow-ui-db/package.json +16 -15
  149. package/speechflow-ui-st/dst/index.css +1 -5
  150. package/speechflow-ui-st/dst/index.js +31 -160
  151. package/speechflow-ui-st/etc/stx.conf +5 -16
  152. package/speechflow-ui-st/package.json +17 -16
@@ -9,7 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import OpenAI from "openai"
12
- import { DateTime } from "luxon"
12
+ import { DateTime, Duration } from "luxon"
13
13
  import SpeexResampler from "speex-resampler"
14
14
  import ws from "ws"
15
15
 
@@ -23,11 +23,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
23
23
  public static name = "a2t-openai"
24
24
 
25
25
  /* internal state */
26
- private openai: OpenAI | null = null
27
- private ws: ws.WebSocket | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
- private resampler: SpeexResampler | null = null
30
- private closing = false
26
+ private openai: OpenAI | null = null
27
+ private ws: ws.WebSocket | null = null
28
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
+ private resampler: SpeexResampler | null = null
30
+ private closing = false
31
31
  private connectionTimeout: ReturnType<typeof setTimeout> | null = null
32
32
 
33
33
  /* construct node */
@@ -43,6 +43,10 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
43
43
  interim: { type: "boolean", val: false }
44
44
  })
45
45
 
46
+ /* sanity check parameters */
47
+ if (!this.params.key)
48
+ throw new Error("OpenAI API key not configured")
49
+
46
50
  /* declare node input/output format */
47
51
  this.input = "audio"
48
52
  this.output = "text"
@@ -141,11 +145,25 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
141
145
  })
142
146
  this.ws.on("close", () => {
143
147
  this.log("info", "WebSocket connection closed")
144
- this.queue!.write(null)
148
+ if (!this.closing && this.queue !== null)
149
+ this.queue.write(null)
145
150
  })
146
151
  this.ws.on("error", (err) => {
147
152
  this.log("error", `WebSocket connection error: ${err}`)
148
153
  })
154
+
155
+ /* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
156
+ const speechTiming = new Map<string, { startMs: number, endMs: number }>()
157
+
158
+ /* helper function for aggregating meta information */
159
+ const aggregateMeta = (start: Duration, end: Duration): Map<string, any> => {
160
+ const metas = metastore.fetch(start, end)
161
+ return metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
162
+ curr.forEach((val, key) => { prev.set(key, val) })
163
+ return prev
164
+ }, new Map<string, any>())
165
+ }
166
+
149
167
  let text = ""
150
168
  this.ws.on("message", (data) => {
151
169
  let ev: any
@@ -163,53 +181,63 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
163
181
  switch (ev.type) {
164
182
  case "transcription_session.created":
165
183
  break
166
- case "conversation.item.created":
184
+ case "conversation.item.created": {
167
185
  text = ""
168
186
  break
187
+ }
169
188
  case "conversation.item.input_audio_transcription.delta": {
170
189
  text += ev.delta as string
171
- if (this.params.interim) {
172
- const start = DateTime.now().diff(this.timeOpen!) // FIXME: OpenAI does not provide timestamps
173
- const end = start // FIXME: OpenAI does not provide timestamps
174
- const metas = metastore.fetch(start, end)
175
- const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
176
- curr.forEach((val, key) => { prev.set(key, val) })
177
- return prev
178
- }, new Map<string, any>())
179
- const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
180
- chunk.meta = meta
181
- this.queue!.write(chunk)
190
+ if (this.params.interim && !this.closing && this.queue !== null) {
191
+ const itemId = ev.item_id as string
192
+ const timing = speechTiming.get(itemId)
193
+ const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
194
+ const end = timing ? Duration.fromMillis(timing.endMs) : start
195
+ const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
196
+ chunk.meta = aggregateMeta(start, end)
197
+ this.queue.write(chunk)
182
198
  }
183
199
  break
184
200
  }
185
201
  case "conversation.item.input_audio_transcription.completed": {
186
- text = ev.transcript as string
187
- const start = DateTime.now().diff(this.timeOpen!) // FIXME: OpenAI does not provide timestamps
188
- const end = start // FIXME: OpenAI does not provide timestamps
189
- const metas = metastore.fetch(start, end)
190
- const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
191
- curr.forEach((val, key) => { prev.set(key, val) })
192
- return prev
193
- }, new Map<string, any>())
194
- metastore.prune(start)
195
- const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
196
- chunk.meta = meta
197
- this.queue!.write(chunk)
198
- text = ""
202
+ if (!this.closing && this.queue !== null) {
203
+ text = ev.transcript as string
204
+ const itemId = ev.item_id as string
205
+ const timing = speechTiming.get(itemId)
206
+ const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
207
+ const end = timing ? Duration.fromMillis(timing.endMs) : start
208
+ const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
209
+ chunk.meta = aggregateMeta(start, end)
210
+ metastore.prune(start)
211
+ speechTiming.delete(itemId)
212
+ this.queue.write(chunk)
213
+ text = ""
214
+ }
199
215
  break
200
216
  }
201
- case "input_audio_buffer.speech_started":
217
+ case "input_audio_buffer.speech_started": {
202
218
  this.log("info", "VAD: speech started")
219
+ const itemId = ev.item_id as string
220
+ const audioStartMs = ev.audio_start_ms as number
221
+ speechTiming.set(itemId, { startMs: audioStartMs, endMs: audioStartMs })
203
222
  break
204
- case "input_audio_buffer.speech_stopped":
223
+ }
224
+ case "input_audio_buffer.speech_stopped": {
205
225
  this.log("info", "VAD: speech stopped")
226
+ const itemId = ev.item_id as string
227
+ const audioEndMs = ev.audio_end_ms as number
228
+ const timing = speechTiming.get(itemId)
229
+ if (timing)
230
+ timing.endMs = audioEndMs
206
231
  break
207
- case "input_audio_buffer.committed":
232
+ }
233
+ case "input_audio_buffer.committed": {
208
234
  this.log("info", "input buffer committed")
209
235
  break
210
- case "error":
236
+ }
237
+ case "error": {
211
238
  this.log("error", `error: ${ev.error?.message}`)
212
239
  break
240
+ }
213
241
  default:
214
242
  break
215
243
  }
@@ -220,6 +248,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
220
248
 
221
249
  /* provide Duplex stream and internally attach to OpenAI API */
222
250
  const self = this
251
+ const reads = new util.PromiseSet<void>()
223
252
  this.stream = new Stream.Duplex({
224
253
  writableObjectMode: true,
225
254
  readableObjectMode: true,
@@ -255,12 +284,32 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
255
284
  callback()
256
285
  }
257
286
  },
287
+ async final (callback) {
288
+ if (self.closing || self.ws === null) {
289
+ callback()
290
+ return
291
+ }
292
+ try {
293
+ sendMessage({ type: "input_audio_buffer.commit" })
294
+ self.ws.close()
295
+ await util.sleep(50)
296
+ }
297
+ catch (error) {
298
+ self.log("warning", `error closing OpenAI connection: ${error}`)
299
+ }
300
+ await reads.awaitAll()
301
+ const chunks: Array<SpeechFlowChunk | null> = self.queue?.drain() ?? []
302
+ for (const chunk of chunks)
303
+ this.push(chunk)
304
+ this.push(null)
305
+ callback()
306
+ },
258
307
  read (size) {
259
308
  if (self.closing || self.queue === null) {
260
309
  this.push(null)
261
310
  return
262
311
  }
263
- self.queue.read().then((chunk) => {
312
+ reads.add(self.queue.read().then((chunk) => {
264
313
  if (self.closing || self.queue === null) {
265
314
  this.push(null)
266
315
  return
@@ -276,23 +325,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
276
325
  }).catch((error: unknown) => {
277
326
  if (!self.closing && self.queue !== null)
278
327
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
279
- })
280
- },
281
- final (callback) {
282
- if (self.closing || self.ws === null) {
283
- callback()
284
- return
285
- }
286
- try {
287
- sendMessage({ type: "input_audio_buffer.commit" })
288
- self.ws.close()
289
- /* NOTICE: do not push null here -- let the OpenAI close event handle it */
290
- callback()
291
- }
292
- catch (error) {
293
- self.log("warning", `error closing OpenAI connection: ${error}`)
294
- callback(util.ensureError(error, "failed to close OpenAI connection"))
295
- }
328
+ }))
296
329
  }
297
330
  })
298
331
  }
@@ -316,11 +349,14 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
316
349
 
317
350
  /* close OpenAI connection */
318
351
  if (this.ws !== null) {
352
+ this.ws.removeAllListeners()
319
353
  this.ws.close()
320
354
  this.ws = null
321
355
  }
322
- if (this.openai !== null)
323
- this.openai = null
356
+ this.openai = null
357
+
358
+ /* close resampler */
359
+ this.resampler = null
324
360
 
325
361
  /* shutdown stream */
326
362
  if (this.stream !== null) {
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
83
83
  "Ruth": { language: "en", languageCode: "en-US", engine: "generative" },
84
84
  "Stephen": { language: "en", languageCode: "en-US", engine: "generative" },
85
85
  "Vicki": { language: "de", languageCode: "de-DE", engine: "generative" },
86
- "Daniel": { language: "de", languageCode: "de-DE", engine: "generative" },
86
+ "Daniel": { language: "de", languageCode: "de-DE", engine: "generative" }
87
87
  }
88
88
  const voiceConfig = voices[this.params.voice as keyof typeof voices]
89
89
  if (voiceConfig === undefined)
@@ -147,11 +147,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
147
147
  callback()
148
148
  },
149
149
  final (callback) {
150
- if (self.closing) {
151
- callback()
152
- return
153
- }
154
- this.push(null)
155
150
  callback()
156
151
  }
157
152
  })
@@ -162,6 +157,12 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
162
157
  /* indicate closing */
163
158
  this.closing = true
164
159
 
160
+ /* shutdown stream */
161
+ if (this.stream !== null) {
162
+ await util.destroyStream(this.stream)
163
+ this.stream = null
164
+ }
165
+
165
166
  /* destroy resampler */
166
167
  if (this.resampler !== null)
167
168
  this.resampler = null
@@ -171,11 +172,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
171
172
  this.client.destroy()
172
173
  this.client = null
173
174
  }
174
- /* shutdown stream */
175
- if (this.stream !== null) {
176
- await util.destroyStream(this.stream)
177
- this.stream = null
178
- }
179
175
  }
180
176
  }
181
177
 
@@ -10,6 +10,7 @@ import Stream from "node:stream"
10
10
  /* external dependencies */
11
11
  import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
12
12
  import { getStreamAsBuffer } from "get-stream"
13
+ import { Duration } from "luxon"
13
14
  import SpeexResampler from "speex-resampler"
14
15
 
15
16
  /* internal dependencies */
@@ -102,14 +103,15 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
102
103
  throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
103
104
  }
104
105
  const labels = voice.labels ?? {}
105
- const info = Object.keys(labels).length > 0 ?
106
- ", " + Object.entries(labels).map(([ key, val ]) => `${key}: "${val}"`).join(", ") : ""
106
+ const info = Object.keys(labels).length > 0
107
+ ? ", " + Object.entries(labels).map(([ key, val ]) => `${key}: "${val}"`).join(", ")
108
+ : ""
107
109
  this.log("info", `selected voice: name: "${voice.name}"${info}`)
108
110
 
109
111
  /* perform text-to-speech operation with Elevenlabs API */
110
- const model = this.params.optimize === "quality" ?
111
- "eleven_turbo_v2_5" :
112
- "eleven_flash_v2_5"
112
+ const model = this.params.optimize === "quality"
113
+ ? "eleven_turbo_v2_5"
114
+ : "eleven_flash_v2_5"
113
115
  const speechStream = (text: string) => {
114
116
  this.log("info", `ElevenLabs: send text "${text}"`)
115
117
  return this.elevenlabs!.textToSpeech.convert(voice.voiceId, {
@@ -140,58 +142,60 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
140
142
  readableObjectMode: true,
141
143
  decodeStrings: false,
142
144
  highWaterMark: 1,
143
- transform (chunk: SpeechFlowChunk, encoding, callback) {
145
+ async transform (chunk: SpeechFlowChunk, encoding, callback) {
144
146
  if (self.closing)
145
147
  callback(new Error("stream already destroyed"))
146
148
  else if (Buffer.isBuffer(chunk.payload))
147
149
  callback(new Error("invalid chunk payload type"))
148
150
  else {
149
- (async () => {
150
- let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
151
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
152
+ processTimeout = null
153
+ callback(new Error("ElevenLabs API timeout"))
154
+ }, 60 * 1000)
155
+ const clearProcessTimeout = () => {
156
+ if (processTimeout !== null) {
157
+ clearTimeout(processTimeout)
151
158
  processTimeout = null
152
- callback(new Error("ElevenLabs API timeout"))
153
- }, 60 * 1000)
154
- const clearProcessTimeout = () => {
155
- if (processTimeout !== null) {
156
- clearTimeout(processTimeout)
157
- processTimeout = null
158
- }
159
159
  }
160
- try {
161
- if (self.closing) {
162
- clearProcessTimeout()
163
- callback(new Error("stream destroyed during processing"))
164
- return
165
- }
166
- const stream = await speechStream(chunk.payload as string)
167
- const buffer = await getStreamAsBuffer(stream)
168
- if (self.closing) {
169
- clearProcessTimeout()
170
- callback(new Error("stream destroyed during processing"))
171
- return
172
- }
173
- const bufferResampled = self.resampler!.processChunk(buffer)
174
- self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
175
- const chunkNew = chunk.clone()
176
- chunkNew.type = "audio"
177
- chunkNew.payload = bufferResampled
160
+ }
161
+ try {
162
+ if (self.closing) {
178
163
  clearProcessTimeout()
179
- this.push(chunkNew)
180
- callback()
164
+ callback(new Error("stream destroyed during processing"))
165
+ return
181
166
  }
182
- catch (error) {
167
+ const stream = await speechStream(chunk.payload as string)
168
+ const buffer = await getStreamAsBuffer(stream)
169
+ if (self.closing) {
183
170
  clearProcessTimeout()
184
- callback(util.ensureError(error, "ElevenLabs processing failed"))
171
+ callback(new Error("stream destroyed during processing"))
172
+ return
185
173
  }
186
- })()
174
+ self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
175
+ const bufferResampled = self.resampler!.processChunk(buffer)
176
+ self.log("info", "ElevenLabs: forwarding resampled audio " +
177
+ `(buffer length: ${bufferResampled.byteLength})`)
178
+
179
+ /* calculate actual audio duration from PCM buffer size */
180
+ const durationMs = util.audioBufferDuration(bufferResampled,
181
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
182
+
183
+ /* create new chunk with recalculated timestamps */
184
+ const chunkNew = chunk.clone()
185
+ chunkNew.type = "audio"
186
+ chunkNew.payload = bufferResampled
187
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
188
+ clearProcessTimeout()
189
+ this.push(chunkNew)
190
+ callback()
191
+ }
192
+ catch (error) {
193
+ clearProcessTimeout()
194
+ callback(util.ensureError(error, "ElevenLabs processing failed"))
195
+ }
187
196
  }
188
197
  },
189
198
  final (callback) {
190
- if (self.closing) {
191
- callback()
192
- return
193
- }
194
- this.push(null)
195
199
  callback()
196
200
  }
197
201
  })
@@ -22,6 +22,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
22
22
 
23
23
  /* internal state */
24
24
  private kokoro: KokoroTTS | null = null
25
+ private closing = false
25
26
  private resampler: SpeexResampler | null = null
26
27
 
27
28
  /* construct node */
@@ -32,7 +33,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
32
33
  this.configure({
33
34
  voice: { type: "string", val: "Aoede", pos: 0, match: /^(?:Aoede|Heart|Puck|Fenrir)$/ },
34
35
  language: { type: "string", val: "en", pos: 1, match: /^(?:en)$/ },
35
- speed: { type: "number", val: 1.25, pos: 2, match: (n: number) => n >= 1.0 && n <= 1.30 },
36
+ speed: { type: "number", val: 1.25, pos: 2, match: (n: number) => n >= 1.0 && n <= 1.30 }
36
37
  })
37
38
 
38
39
  /* declare node input/output format */
@@ -40,8 +41,16 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
40
41
  this.output = "audio"
41
42
  }
42
43
 
44
+ /* one-time status of node */
45
+ async status () {
46
+ return {}
47
+ }
48
+
43
49
  /* open node */
44
50
  async open () {
51
+ /* clear destruction flag */
52
+ this.closing = false
53
+
45
54
  /* establish Kokoro */
46
55
  const model = "onnx-community/Kokoro-82M-v1.0-ONNX"
47
56
  const progressState = new Map<string, number>()
@@ -126,15 +135,19 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
126
135
  decodeStrings: false,
127
136
  highWaterMark: 1,
128
137
  transform (chunk: SpeechFlowChunk, encoding, callback) {
129
- if (Buffer.isBuffer(chunk.payload))
138
+ if (self.closing)
139
+ callback(new Error("stream already destroyed"))
140
+ else if (Buffer.isBuffer(chunk.payload))
130
141
  callback(new Error("invalid chunk payload type"))
131
142
  else {
132
143
  text2speech(chunk.payload).then((buffer) => {
144
+ if (self.closing)
145
+ throw new Error("stream destroyed during processing")
133
146
  self.log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
134
- chunk = chunk.clone()
135
- chunk.type = "audio"
136
- chunk.payload = buffer
137
- this.push(chunk)
147
+ const chunkNew = chunk.clone()
148
+ chunkNew.type = "audio"
149
+ chunkNew.payload = buffer
150
+ this.push(chunkNew)
138
151
  callback()
139
152
  }).catch((error: unknown) => {
140
153
  callback(util.ensureError(error))
@@ -142,7 +155,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
142
155
  }
143
156
  },
144
157
  final (callback) {
145
- this.push(null)
146
158
  callback()
147
159
  }
148
160
  })
@@ -150,6 +162,9 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
150
162
 
151
163
  /* close node */
152
164
  async close () {
165
+ /* indicate closing */
166
+ this.closing = true
167
+
153
168
  /* shutdown stream */
154
169
  if (this.stream !== null) {
155
170
  await util.destroyStream(this.stream)
@@ -98,7 +98,7 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
98
98
  if (!retriable || attempt >= maxRetries)
99
99
  break
100
100
  const delayMs = Math.min(1000 * Math.pow(2, attempt - 1), 5000)
101
- await new Promise((resolve) => setTimeout(resolve, delayMs))
101
+ await util.sleep(delayMs)
102
102
  }
103
103
  }
104
104
  throw util.ensureError(lastError)
@@ -129,7 +129,6 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
129
129
  }
130
130
  },
131
131
  final (callback) {
132
- this.push(null)
133
132
  callback()
134
133
  }
135
134
  })
@@ -100,7 +100,6 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
100
100
  }
101
101
  },
102
102
  final (callback) {
103
- this.push(null)
104
103
  callback()
105
104
  }
106
105
  })
@@ -64,7 +64,6 @@ export default class SpeechFlowNodeT2TFormat extends SpeechFlowNode {
64
64
  }
65
65
  },
66
66
  final (callback) {
67
- this.push(null)
68
67
  callback()
69
68
  }
70
69
  })
@@ -110,7 +110,6 @@ export default class SpeechFlowNodeT2TGoogle extends SpeechFlowNode {
110
110
  }
111
111
  },
112
112
  final (callback) {
113
- this.push(null)
114
113
  callback()
115
114
  }
116
115
  })
@@ -67,7 +67,6 @@ export default class SpeechFlowNodeT2TModify extends SpeechFlowNode {
67
67
  }
68
68
  },
69
69
  final (callback) {
70
- this.push(null)
71
70
  callback()
72
71
  }
73
72
  })
@@ -258,7 +258,6 @@ export default class SpeechFlowNodeT2TOllama extends SpeechFlowNode {
258
258
  }
259
259
  },
260
260
  final (callback) {
261
- this.push(null)
262
261
  callback()
263
262
  }
264
263
  })
@@ -226,7 +226,6 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
226
226
  }
227
227
  },
228
228
  final (callback) {
229
- this.push(null)
230
229
  callback()
231
230
  }
232
231
  })