speechflow 1.6.6 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/README.md +28 -14
  3. package/etc/secretlint.json +7 -0
  4. package/etc/speechflow.yaml +13 -4
  5. package/etc/stx.conf +3 -2
  6. package/package.json +9 -7
  7. package/speechflow-cli/dst/speechflow-main-api.js +3 -3
  8. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
  10. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-main-graph.js +10 -9
  12. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-main-status.js +35 -1
  14. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
  16. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
  18. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
  20. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -2
  22. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
  24. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  25. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
  26. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
  28. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +0 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +16 -13
  32. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +21 -16
  34. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +75 -46
  36. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  37. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +39 -39
  38. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  39. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +0 -1
  40. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
  42. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
  44. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  45. package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
  46. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
  48. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
  50. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
  51. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
  52. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  53. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
  54. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
  56. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +0 -1
  58. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
  60. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-xio-device.js +2 -2
  62. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -3
  64. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  65. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +7 -2
  66. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +14 -4
  68. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node.js +1 -1
  70. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  71. package/speechflow-cli/dst/speechflow-util-audio.js +3 -3
  72. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  73. package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
  74. package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
  75. package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
  76. package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
  77. package/speechflow-cli/dst/speechflow-util-queue.js +33 -12
  78. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  79. package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
  80. package/speechflow-cli/dst/speechflow-util-stream.js +13 -17
  81. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  83. package/speechflow-cli/dst/speechflow-util.js +1 -0
  84. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  85. package/speechflow-cli/etc/oxlint.jsonc +7 -1
  86. package/speechflow-cli/etc/stx.conf +1 -0
  87. package/speechflow-cli/package.json +31 -30
  88. package/speechflow-cli/src/speechflow-main-api.ts +3 -6
  89. package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
  90. package/speechflow-cli/src/speechflow-main-graph.ts +10 -11
  91. package/speechflow-cli/src/speechflow-main-status.ts +2 -2
  92. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
  93. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
  94. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
  95. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +1 -2
  96. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
  97. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
  98. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
  99. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +0 -1
  100. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +21 -16
  101. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +24 -16
  102. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +86 -54
  103. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +41 -38
  104. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +0 -1
  105. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
  106. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
  107. package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
  108. package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
  109. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
  110. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
  111. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
  112. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
  113. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +0 -1
  114. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +0 -5
  115. package/speechflow-cli/src/speechflow-node-xio-device.ts +2 -2
  116. package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -3
  117. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +7 -2
  118. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +14 -4
  119. package/speechflow-cli/src/speechflow-node.ts +1 -1
  120. package/speechflow-cli/src/speechflow-util-audio.ts +3 -3
  121. package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
  122. package/speechflow-cli/src/speechflow-util-queue.ts +36 -16
  123. package/speechflow-cli/src/speechflow-util-stream.ts +24 -21
  124. package/speechflow-cli/src/speechflow-util.ts +1 -0
  125. package/speechflow-ui-db/dst/app-font-fa-brands-400.woff2 +0 -0
  126. package/speechflow-ui-db/dst/app-font-fa-regular-400.woff2 +0 -0
  127. package/speechflow-ui-db/dst/app-font-fa-solid-900.woff2 +0 -0
  128. package/speechflow-ui-db/dst/app-font-fa-v4compatibility.woff2 +0 -0
  129. package/speechflow-ui-db/dst/index.css +1 -5
  130. package/speechflow-ui-db/dst/index.js +17 -58
  131. package/speechflow-ui-db/etc/stx.conf +5 -16
  132. package/speechflow-ui-db/package.json +20 -19
  133. package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
  134. package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
  135. package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
  136. package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
  137. package/speechflow-ui-st/dst/index.css +1 -5
  138. package/speechflow-ui-st/dst/index.js +36 -79
  139. package/speechflow-ui-st/etc/stx.conf +5 -16
  140. package/speechflow-ui-st/package.json +21 -20
  141. package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.d.ts +0 -1
  142. package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js +0 -149
  143. package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js.map +0 -1
  144. package/speechflow-cli/dst/speechflow-node-a2a-pitch2.d.ts +0 -13
  145. package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js +0 -202
  146. package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js.map +0 -1
@@ -187,6 +187,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
187
187
 
188
188
  /* provide Duplex stream and internally attach to Deepgram API */
189
189
  const self = this
190
+ const reads = new util.PromiseSet<void>()
190
191
  this.stream = new Stream.Duplex({
191
192
  writableObjectMode: true,
192
193
  readableObjectMode: true,
@@ -217,12 +218,33 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
217
218
  callback()
218
219
  }
219
220
  },
221
+ async final (callback) {
222
+ /* short-circuiting in case of own closing */
223
+ if (self.closing || self.dg === null) {
224
+ callback()
225
+ return
226
+ }
227
+
228
+ /* close Deepgram API */
229
+ try {
230
+ self.dg.requestClose()
231
+ }
232
+ catch (error) {
233
+ self.log("warning", `error closing Deepgram connection: ${error}`)
234
+ }
235
+
236
+ /* await all read operations */
237
+ await reads.awaitAll()
238
+
239
+ /* NOTICE: do not push null here -- let the Deepgram close event handle it */
240
+ callback()
241
+ },
220
242
  read (size) {
221
243
  if (self.closing || self.queue === null) {
222
244
  this.push(null)
223
245
  return
224
246
  }
225
- self.queue.read().then((chunk) => {
247
+ reads.add(self.queue.read().then((chunk) => {
226
248
  if (self.closing || self.queue === null) {
227
249
  this.push(null)
228
250
  return
@@ -238,21 +260,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
238
260
  }).catch((error: unknown) => {
239
261
  if (!self.closing && self.queue !== null)
240
262
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
241
- })
242
- },
243
- final (callback) {
244
- if (self.closing || self.dg === null) {
245
- callback()
246
- return
247
- }
248
- try {
249
- self.dg.requestClose()
250
- }
251
- catch (error) {
252
- self.log("warning", `error closing Deepgram connection: ${error}`)
253
- }
254
- /* NOTICE: do not push null here -- let the Deepgram close event handle it */
255
- callback()
263
+ }))
256
264
  }
257
265
  })
258
266
  }
@@ -9,7 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import OpenAI from "openai"
12
- import { DateTime } from "luxon"
12
+ import { DateTime, Duration } from "luxon"
13
13
  import SpeexResampler from "speex-resampler"
14
14
  import ws from "ws"
15
15
 
@@ -23,11 +23,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
23
23
  public static name = "a2t-openai"
24
24
 
25
25
  /* internal state */
26
- private openai: OpenAI | null = null
27
- private ws: ws.WebSocket | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
- private resampler: SpeexResampler | null = null
30
- private closing = false
26
+ private openai: OpenAI | null = null
27
+ private ws: ws.WebSocket | null = null
28
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
+ private resampler: SpeexResampler | null = null
30
+ private closing = false
31
31
  private connectionTimeout: ReturnType<typeof setTimeout> | null = null
32
32
 
33
33
  /* construct node */
@@ -141,11 +141,25 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
141
141
  })
142
142
  this.ws.on("close", () => {
143
143
  this.log("info", "WebSocket connection closed")
144
- this.queue!.write(null)
144
+ if (!this.closing && this.queue !== null)
145
+ this.queue.write(null)
145
146
  })
146
147
  this.ws.on("error", (err) => {
147
148
  this.log("error", `WebSocket connection error: ${err}`)
148
149
  })
150
+
151
+ /* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
152
+ const speechTiming = new Map<string, { startMs: number, endMs: number }>()
153
+
154
+ /* helper function for aggregating meta information */
155
+ const aggregateMeta = (start: Duration, end: Duration): Map<string, any> => {
156
+ const metas = metastore.fetch(start, end)
157
+ return metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
158
+ curr.forEach((val, key) => { prev.set(key, val) })
159
+ return prev
160
+ }, new Map<string, any>())
161
+ }
162
+
149
163
  let text = ""
150
164
  this.ws.on("message", (data) => {
151
165
  let ev: any
@@ -163,53 +177,63 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
163
177
  switch (ev.type) {
164
178
  case "transcription_session.created":
165
179
  break
166
- case "conversation.item.created":
180
+ case "conversation.item.created": {
167
181
  text = ""
168
182
  break
183
+ }
169
184
  case "conversation.item.input_audio_transcription.delta": {
170
185
  text += ev.delta as string
171
- if (this.params.interim) {
172
- const start = DateTime.now().diff(this.timeOpen!) // FIXME: OpenAI does not provide timestamps
173
- const end = start // FIXME: OpenAI does not provide timestamps
174
- const metas = metastore.fetch(start, end)
175
- const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
176
- curr.forEach((val, key) => { prev.set(key, val) })
177
- return prev
178
- }, new Map<string, any>())
179
- const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
180
- chunk.meta = meta
181
- this.queue!.write(chunk)
186
+ if (this.params.interim && !this.closing && this.queue !== null) {
187
+ const itemId = ev.item_id as string
188
+ const timing = speechTiming.get(itemId)
189
+ const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
190
+ const end = timing ? Duration.fromMillis(timing.endMs) : start
191
+ const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
192
+ chunk.meta = aggregateMeta(start, end)
193
+ this.queue.write(chunk)
182
194
  }
183
195
  break
184
196
  }
185
197
  case "conversation.item.input_audio_transcription.completed": {
186
- text = ev.transcript as string
187
- const start = DateTime.now().diff(this.timeOpen!) // FIXME: OpenAI does not provide timestamps
188
- const end = start // FIXME: OpenAI does not provide timestamps
189
- const metas = metastore.fetch(start, end)
190
- const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
191
- curr.forEach((val, key) => { prev.set(key, val) })
192
- return prev
193
- }, new Map<string, any>())
194
- metastore.prune(start)
195
- const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
196
- chunk.meta = meta
197
- this.queue!.write(chunk)
198
- text = ""
198
+ if (!this.closing && this.queue !== null) {
199
+ text = ev.transcript as string
200
+ const itemId = ev.item_id as string
201
+ const timing = speechTiming.get(itemId)
202
+ const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
203
+ const end = timing ? Duration.fromMillis(timing.endMs) : start
204
+ const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
205
+ chunk.meta = aggregateMeta(start, end)
206
+ metastore.prune(start)
207
+ speechTiming.delete(itemId)
208
+ this.queue.write(chunk)
209
+ text = ""
210
+ }
199
211
  break
200
212
  }
201
- case "input_audio_buffer.speech_started":
213
+ case "input_audio_buffer.speech_started": {
202
214
  this.log("info", "VAD: speech started")
215
+ const itemId = ev.item_id as string
216
+ const audioStartMs = ev.audio_start_ms as number
217
+ speechTiming.set(itemId, { startMs: audioStartMs, endMs: audioStartMs })
203
218
  break
204
- case "input_audio_buffer.speech_stopped":
219
+ }
220
+ case "input_audio_buffer.speech_stopped": {
205
221
  this.log("info", "VAD: speech stopped")
222
+ const itemId = ev.item_id as string
223
+ const audioEndMs = ev.audio_end_ms as number
224
+ const timing = speechTiming.get(itemId)
225
+ if (timing)
226
+ timing.endMs = audioEndMs
206
227
  break
207
- case "input_audio_buffer.committed":
228
+ }
229
+ case "input_audio_buffer.committed": {
208
230
  this.log("info", "input buffer committed")
209
231
  break
210
- case "error":
232
+ }
233
+ case "error": {
211
234
  this.log("error", `error: ${ev.error?.message}`)
212
235
  break
236
+ }
213
237
  default:
214
238
  break
215
239
  }
@@ -220,6 +244,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
220
244
 
221
245
  /* provide Duplex stream and internally attach to OpenAI API */
222
246
  const self = this
247
+ const reads = new util.PromiseSet<void>()
223
248
  this.stream = new Stream.Duplex({
224
249
  writableObjectMode: true,
225
250
  readableObjectMode: true,
@@ -255,12 +280,32 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
255
280
  callback()
256
281
  }
257
282
  },
283
+ async final (callback) {
284
+ if (self.closing || self.ws === null) {
285
+ callback()
286
+ return
287
+ }
288
+ try {
289
+ sendMessage({ type: "input_audio_buffer.commit" })
290
+ self.ws.close()
291
+ await util.sleep(50)
292
+ }
293
+ catch (error) {
294
+ self.log("warning", `error closing OpenAI connection: ${error}`)
295
+ }
296
+ await reads.awaitAll()
297
+ const chunks: Array<SpeechFlowChunk | null> = self.queue?.drain() ?? []
298
+ for (const chunk of chunks)
299
+ this.push(chunk)
300
+ this.push(null)
301
+ callback()
302
+ },
258
303
  read (size) {
259
304
  if (self.closing || self.queue === null) {
260
305
  this.push(null)
261
306
  return
262
307
  }
263
- self.queue.read().then((chunk) => {
308
+ reads.add(self.queue.read().then((chunk) => {
264
309
  if (self.closing || self.queue === null) {
265
310
  this.push(null)
266
311
  return
@@ -276,23 +321,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
276
321
  }).catch((error: unknown) => {
277
322
  if (!self.closing && self.queue !== null)
278
323
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
279
- })
280
- },
281
- final (callback) {
282
- if (self.closing || self.ws === null) {
283
- callback()
284
- return
285
- }
286
- try {
287
- sendMessage({ type: "input_audio_buffer.commit" })
288
- self.ws.close()
289
- /* NOTICE: do not push null here -- let the OpenAI close event handle it */
290
- callback()
291
- }
292
- catch (error) {
293
- self.log("warning", `error closing OpenAI connection: ${error}`)
294
- callback(util.ensureError(error, "failed to close OpenAI connection"))
295
- }
324
+ }))
296
325
  }
297
326
  })
298
327
  }
@@ -316,11 +345,14 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
316
345
 
317
346
  /* close OpenAI connection */
318
347
  if (this.ws !== null) {
348
+ this.ws.removeAllListeners()
319
349
  this.ws.close()
320
350
  this.ws = null
321
351
  }
322
352
  if (this.openai !== null)
323
353
  this.openai = null
354
+ if (this.resampler !== null)
355
+ this.resampler = null
324
356
 
325
357
  /* shutdown stream */
326
358
  if (this.stream !== null) {
@@ -10,6 +10,7 @@ import Stream from "node:stream"
10
10
  /* external dependencies */
11
11
  import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
12
12
  import { getStreamAsBuffer } from "get-stream"
13
+ import { Duration } from "luxon"
13
14
  import SpeexResampler from "speex-resampler"
14
15
 
15
16
  /* internal dependencies */
@@ -140,58 +141,60 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
140
141
  readableObjectMode: true,
141
142
  decodeStrings: false,
142
143
  highWaterMark: 1,
143
- transform (chunk: SpeechFlowChunk, encoding, callback) {
144
+ async transform (chunk: SpeechFlowChunk, encoding, callback) {
144
145
  if (self.closing)
145
146
  callback(new Error("stream already destroyed"))
146
147
  else if (Buffer.isBuffer(chunk.payload))
147
148
  callback(new Error("invalid chunk payload type"))
148
149
  else {
149
- (async () => {
150
- let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
150
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
151
+ processTimeout = null
152
+ callback(new Error("ElevenLabs API timeout"))
153
+ }, 60 * 1000)
154
+ const clearProcessTimeout = () => {
155
+ if (processTimeout !== null) {
156
+ clearTimeout(processTimeout)
151
157
  processTimeout = null
152
- callback(new Error("ElevenLabs API timeout"))
153
- }, 60 * 1000)
154
- const clearProcessTimeout = () => {
155
- if (processTimeout !== null) {
156
- clearTimeout(processTimeout)
157
- processTimeout = null
158
- }
159
158
  }
160
- try {
161
- if (self.closing) {
162
- clearProcessTimeout()
163
- callback(new Error("stream destroyed during processing"))
164
- return
165
- }
166
- const stream = await speechStream(chunk.payload as string)
167
- const buffer = await getStreamAsBuffer(stream)
168
- if (self.closing) {
169
- clearProcessTimeout()
170
- callback(new Error("stream destroyed during processing"))
171
- return
172
- }
173
- const bufferResampled = self.resampler!.processChunk(buffer)
174
- self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
175
- const chunkNew = chunk.clone()
176
- chunkNew.type = "audio"
177
- chunkNew.payload = bufferResampled
159
+ }
160
+ try {
161
+ if (self.closing) {
178
162
  clearProcessTimeout()
179
- this.push(chunkNew)
180
- callback()
163
+ callback(new Error("stream destroyed during processing"))
164
+ return
181
165
  }
182
- catch (error) {
166
+ const stream = await speechStream(chunk.payload as string)
167
+ const buffer = await getStreamAsBuffer(stream)
168
+ if (self.closing) {
183
169
  clearProcessTimeout()
184
- callback(util.ensureError(error, "ElevenLabs processing failed"))
170
+ callback(new Error("stream destroyed during processing"))
171
+ return
185
172
  }
186
- })()
173
+ self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
174
+ const bufferResampled = self.resampler!.processChunk(buffer)
175
+ self.log("info", "ElevenLabs: forwarding resampled audio " +
176
+ `(buffer length: ${bufferResampled.byteLength})`)
177
+
178
+ /* calculate actual audio duration from PCM buffer size */
179
+ const durationMs = util.audioBufferDuration(bufferResampled,
180
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
181
+
182
+ /* create new chunk with recalculated timestamps */
183
+ const chunkNew = chunk.clone()
184
+ chunkNew.type = "audio"
185
+ chunkNew.payload = bufferResampled
186
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
187
+ clearProcessTimeout()
188
+ this.push(chunkNew)
189
+ callback()
190
+ }
191
+ catch (error) {
192
+ clearProcessTimeout()
193
+ callback(util.ensureError(error, "ElevenLabs processing failed"))
194
+ }
187
195
  }
188
196
  },
189
197
  final (callback) {
190
- if (self.closing) {
191
- callback()
192
- return
193
- }
194
- this.push(null)
195
198
  callback()
196
199
  }
197
200
  })
@@ -142,7 +142,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
142
142
  }
143
143
  },
144
144
  final (callback) {
145
- this.push(null)
146
145
  callback()
147
146
  }
148
147
  })
@@ -98,7 +98,7 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
98
98
  if (!retriable || attempt >= maxRetries)
99
99
  break
100
100
  const delayMs = Math.min(1000 * Math.pow(2, attempt - 1), 5000)
101
- await new Promise((resolve) => setTimeout(resolve, delayMs))
101
+ await util.sleep(delayMs)
102
102
  }
103
103
  }
104
104
  throw util.ensureError(lastError)
@@ -129,7 +129,6 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
129
129
  }
130
130
  },
131
131
  final (callback) {
132
- this.push(null)
133
132
  callback()
134
133
  }
135
134
  })
@@ -100,7 +100,6 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
100
100
  }
101
101
  },
102
102
  final (callback) {
103
- this.push(null)
104
103
  callback()
105
104
  }
106
105
  })
@@ -64,7 +64,6 @@ export default class SpeechFlowNodeT2TFormat extends SpeechFlowNode {
64
64
  }
65
65
  },
66
66
  final (callback) {
67
- this.push(null)
68
67
  callback()
69
68
  }
70
69
  })
@@ -110,7 +110,6 @@ export default class SpeechFlowNodeT2TGoogle extends SpeechFlowNode {
110
110
  }
111
111
  },
112
112
  final (callback) {
113
- this.push(null)
114
113
  callback()
115
114
  }
116
115
  })
@@ -67,7 +67,6 @@ export default class SpeechFlowNodeT2TModify extends SpeechFlowNode {
67
67
  }
68
68
  },
69
69
  final (callback) {
70
- this.push(null)
71
70
  callback()
72
71
  }
73
72
  })
@@ -258,7 +258,6 @@ export default class SpeechFlowNodeT2TOllama extends SpeechFlowNode {
258
258
  }
259
259
  },
260
260
  final (callback) {
261
- this.push(null)
262
261
  callback()
263
262
  }
264
263
  })
@@ -226,7 +226,6 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
226
226
  }
227
227
  },
228
228
  final (callback) {
229
- this.push(null)
230
229
  callback()
231
230
  }
232
231
  })