speechflow 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +4 -4
  3. package/package.json +4 -4
  4. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  5. package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
  6. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-graph.js +2 -4
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
  10. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
  16. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
  20. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
  25. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
  27. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
  30. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
  32. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +7 -0
  34. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  36. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
  37. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-google.js +0 -6
  40. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
  42. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
  44. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
  45. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
  48. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-t2a-google.js +1 -4
  50. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
  51. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  52. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
  53. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  54. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +1 -4
  55. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
  56. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +2 -3
  57. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +97 -459
  58. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
  60. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  62. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +18 -16
  64. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +2 -3
  66. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +2 -3
  68. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -2
  70. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  71. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +2 -3
  72. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
  73. package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -2
  74. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
  75. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
  76. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-xio-exec.js +1 -0
  78. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
  79. package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -5
  80. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  81. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
  83. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +2 -0
  84. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
  85. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
  86. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-util-audio.js +4 -0
  88. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  89. package/speechflow-cli/dst/speechflow-util-llm.d.ts +0 -1
  90. package/speechflow-cli/dst/speechflow-util-llm.js +4 -8
  91. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
  92. package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
  93. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  94. package/speechflow-cli/dst/speechflow-util.js +1 -0
  95. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  96. package/speechflow-cli/dst/test.d.ts +1 -0
  97. package/speechflow-cli/dst/test.js +18 -0
  98. package/speechflow-cli/dst/test.js.map +1 -0
  99. package/speechflow-cli/etc/oxlint.jsonc +3 -1
  100. package/speechflow-cli/package.json +16 -16
  101. package/speechflow-cli/src/speechflow-main-api.ts +16 -16
  102. package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
  103. package/speechflow-cli/src/speechflow-main-graph.ts +7 -9
  104. package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
  105. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
  106. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
  107. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
  108. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
  109. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
  110. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
  111. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  112. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
  113. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
  114. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
  115. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
  116. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
  117. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +7 -0
  118. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
  119. package/speechflow-cli/src/speechflow-node-a2t-google.ts +4 -11
  120. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
  121. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
  122. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
  123. package/speechflow-cli/src/speechflow-node-t2a-google.ts +1 -4
  124. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
  125. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +1 -4
  126. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +106 -571
  127. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -3
  128. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  129. package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
  130. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +19 -18
  131. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +2 -3
  132. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +2 -3
  133. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +5 -2
  134. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +2 -3
  135. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -2
  136. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
  137. package/speechflow-cli/src/speechflow-node-xio-exec.ts +1 -0
  138. package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -5
  139. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
  140. package/speechflow-cli/src/speechflow-node-xio-vban.ts +5 -5
  141. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +2 -0
  142. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  143. package/speechflow-cli/src/speechflow-util-audio.ts +5 -0
  144. package/speechflow-cli/src/speechflow-util-llm.ts +4 -9
  145. package/speechflow-cli/src/speechflow-util-queue.ts +4 -4
  146. package/speechflow-cli/src/speechflow-util.ts +1 -0
  147. package/speechflow-ui-db/dst/index.js +14 -14
  148. package/speechflow-ui-db/package.json +6 -6
  149. package/speechflow-ui-st/dst/index.js +32 -32
  150. package/speechflow-ui-st/package.json +6 -6
@@ -85,6 +85,18 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
85
85
  }
86
86
  }
87
87
 
88
+ /* helper function for tail timer handling */
89
+ const startTailTimer = () => {
90
+ tail = true
91
+ clearTailTimer()
92
+ this.tailTimer = setTimeout(() => {
93
+ if (this.closing || this.tailTimer === null)
94
+ return
95
+ tail = false
96
+ this.tailTimer = null
97
+ }, this.params.postSpeechTail)
98
+ }
99
+
88
100
  /* establish Voice Activity Detection (VAD) facility */
89
101
  let tail = false
90
102
  try {
@@ -111,31 +123,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
111
123
  return
112
124
  const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
113
125
  this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
114
- if (this.params.mode === "unplugged") {
115
- tail = true
116
- clearTailTimer()
117
- this.tailTimer = setTimeout(() => {
118
- if (this.closing || this.tailTimer === null)
119
- return
120
- tail = false
121
- this.tailTimer = null
122
- }, this.params.postSpeechTail)
123
- }
126
+ if (this.params.mode === "unplugged")
127
+ startTailTimer()
124
128
  },
125
129
  onVADMisfire: () => {
126
130
  if (this.closing)
127
131
  return
128
132
  this.log("info", "VAD: speech end (segment too short)")
129
- if (this.params.mode === "unplugged") {
130
- tail = true
131
- clearTailTimer()
132
- this.tailTimer = setTimeout(() => {
133
- if (this.closing || this.tailTimer === null)
134
- return
135
- tail = false
136
- this.tailTimer = null
137
- }, this.params.postSpeechTail)
138
- }
133
+ if (this.params.mode === "unplugged")
134
+ startTailTimer()
139
135
  },
140
136
  onFrameProcessed: (audio) => {
141
137
  if (this.closing)
@@ -144,7 +140,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
144
140
  /* annotate the current audio segment */
145
141
  const element = this.queueVAD.peek()
146
142
  if (element === undefined || element.type !== "audio-frame")
147
- throw new Error("internal error which cannot happen: no more queued element")
143
+ throw new Error("internal error that cannot happen: no more queued element")
148
144
  if (element.segmentIdx >= element.segmentData.length)
149
145
  throw new Error("segment index out of bounds")
150
146
  const segment = element.segmentData[element.segmentIdx++]
@@ -227,6 +223,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
227
223
  }
228
224
  }
229
225
 
226
+ /* signal completion */
230
227
  callback()
231
228
  }
232
229
  catch (error) {
@@ -322,6 +319,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
322
319
  }
323
320
  }
324
321
 
322
+ /* peek at send queue element */
325
323
  const element = self.queueSend.peek()
326
324
  if (element !== undefined && element.type === "audio-eof")
327
325
  this.push(null)
@@ -371,8 +369,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
371
369
  if (this.vad !== null) {
372
370
  try {
373
371
  const flushPromise = this.vad.flush()
374
- const timeoutPromise = new Promise((resolve) =>
375
- setTimeout(resolve, 5000))
372
+ const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
376
373
  await Promise.race([ flushPromise, timeoutPromise ])
377
374
  }
378
375
  catch (error) {
@@ -21,15 +21,18 @@ const writeWavHeader = (
21
21
  const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
22
22
  const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
23
23
 
24
+ /* determine header dimensions */
24
25
  const headerLength = 44
25
26
  const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
26
27
  const dataLength = length ?? maxDataSize
27
28
  const fileSize = dataLength + headerLength
28
29
  const header = Buffer.alloc(headerLength)
29
30
 
31
+ /* calculate byte rate and block alignment */
30
32
  const byteRate = (sampleRate * channels * bitDepth) / 8
31
33
  const blockAlign = (channels * bitDepth) / 8
32
34
 
35
+ /* write header fields */
33
36
  let offset = 0
34
37
  header.write("RIFF", offset); offset += 4
35
38
  header.writeUInt32LE(fileSize - 8, offset); offset += 4
@@ -45,6 +48,7 @@ const writeWavHeader = (
45
48
  header.write("data", offset); offset += 4
46
49
  header.writeUInt32LE(dataLength, offset); offset += 4
47
50
 
51
+ /* return completed header */
48
52
  return header
49
53
  }
50
54
 
@@ -53,6 +57,7 @@ const readWavHeader = (buffer: Buffer) => {
53
57
  if (buffer.length < 44)
54
58
  throw new Error("WAV header too short, expected at least 44 bytes")
55
59
 
60
+ /* read header fields */
56
61
  let offset = 0
57
62
  const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
58
63
  const fileSize = buffer.readUInt32LE(offset); offset += 4
@@ -68,6 +73,7 @@ const readWavHeader = (buffer: Buffer) => {
68
73
  const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
69
74
  const dataLength = buffer.readUInt32LE(offset); offset += 4
70
75
 
76
+ /* validate RIFF header */
71
77
  if (riffHead !== "RIFF")
72
78
  throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
73
79
  if (waveHead !== "WAVE")
@@ -77,6 +83,7 @@ const readWavHeader = (buffer: Buffer) => {
77
83
  if (data !== "data")
78
84
  throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
79
85
 
86
+ /* return parsed header data */
80
87
  return {
81
88
  riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
82
89
  channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
@@ -53,7 +53,7 @@ class AsyncQueue<T> {
53
53
  continue
54
54
  }
55
55
  else {
56
- const it = await new Promise<IteratorResult<T>>((resolve) => this.resolvers.push(resolve))
56
+ const it = await new Promise<IteratorResult<T>>((resolve) => { this.resolvers.push(resolve) })
57
57
  if (it.done)
58
58
  return
59
59
  yield it.value
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
68
68
  public static name = "a2t-amazon"
69
69
 
70
70
  /* internal state */
71
- private client: TranscribeStreamingClient | null = null
72
- private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
- private closing = false
74
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
75
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
71
+ private client: TranscribeStreamingClient | null = null
72
+ private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
+ private closing = false
74
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
76
75
 
77
76
  /* construct node */
78
77
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -126,8 +125,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
126
125
  secretAccessKey: this.params.secKey
127
126
  }
128
127
  })
129
- if (this.client === null)
130
- throw new Error("failed to establish Amazon Transcribe client")
131
128
 
132
129
  /* create an AudioStream for Amazon Transcribe */
133
130
  const audioQueue = new AsyncQueue<Uint8Array>()
@@ -236,11 +233,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
236
233
  callback()
237
234
  return
238
235
  }
239
-
240
- /* await all read operations */
241
236
  await reads.awaitAll()
242
-
243
- util.run(
237
+ util.run("closing Amazon Transcribe connection",
244
238
  () => self.client!.destroy(),
245
239
  (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
246
240
  )
@@ -279,12 +273,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
279
273
  /* indicate closing first to stop all async operations */
280
274
  this.closing = true
281
275
 
282
- /* cleanup all timers */
283
- if (this.connectionTimeout !== null) {
284
- clearTimeout(this.connectionTimeout)
285
- this.connectionTimeout = null
286
- }
287
-
288
276
  /* close queue */
289
277
  if (this.queue !== null) {
290
278
  this.queue.write(null)
@@ -22,11 +22,10 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
22
22
  public static name = "a2t-google"
23
23
 
24
24
  /* internal state */
25
- private client: GoogleSpeech.SpeechClient | null = null
26
- private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
27
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
- private closing = false
25
+ private client: GoogleSpeech.SpeechClient | null = null
26
+ private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
27
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
28
+ private closing = false
30
29
 
31
30
  /* construct node */
32
31
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -280,12 +279,6 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
280
279
  /* indicate closing first to stop all async operations */
281
280
  this.closing = true
282
281
 
283
- /* cleanup all timers */
284
- if (this.connectionTimeout !== null) {
285
- clearTimeout(this.connectionTimeout)
286
- this.connectionTimeout = null
287
- }
288
-
289
282
  /* shutdown stream */
290
283
  if (this.stream !== null) {
291
284
  await util.destroyStream(this.stream)
@@ -23,12 +23,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
23
23
  public static name = "a2t-openai"
24
24
 
25
25
  /* internal state */
26
- private openai: OpenAI | null = null
27
- private ws: ws.WebSocket | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
- private resampler: SpeexResampler | null = null
30
- private closing = false
31
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
26
+ private openai: OpenAI | null = null
27
+ private ws: ws.WebSocket | null = null
28
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
+ private resampler: SpeexResampler | null = null
30
+ private closing = false
31
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
32
32
 
33
33
  /* construct node */
34
34
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -150,6 +150,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
150
150
  })
151
151
  this.ws.on("error", (err) => {
152
152
  this.log("error", `WebSocket connection error: ${err}`)
153
+ if (!this.closing && this.queue !== null)
154
+ this.queue.write(null)
155
+ this.emit("error", err)
153
156
  })
154
157
 
155
158
  /* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
@@ -164,6 +167,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
164
167
  }, new Map<string, any>())
165
168
  }
166
169
 
170
+ /* track transcription text */
167
171
  let text = ""
168
172
  this.ws.on("message", (data) => {
169
173
  let ev: any
@@ -353,7 +357,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
353
357
  this.ws.close()
354
358
  this.ws = null
355
359
  }
356
- this.openai = null
360
+ if (this.openai !== null)
361
+ this.openai = null
357
362
 
358
363
  /* close resampler */
359
364
  this.resampler = null
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { getStreamAsBuffer } from "get-stream"
12
+ import { Duration } from "luxon"
12
13
  import SpeexResampler from "speex-resampler"
13
14
  import {
14
15
  PollyClient, SynthesizeSpeechCommand,
@@ -25,9 +26,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
25
26
  public static name = "t2a-amazon"
26
27
 
27
28
  /* internal state */
28
- private client: PollyClient | null = null
29
- private closing = false
29
+ private client: PollyClient | null = null
30
30
  private resampler: SpeexResampler | null = null
31
+ private closing = false
31
32
 
32
33
  /* construct node */
33
34
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -129,22 +130,43 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
129
130
  }
130
131
  if (Buffer.isBuffer(chunk.payload))
131
132
  callback(new Error("invalid chunk payload type"))
132
- else if (chunk.payload.length > 0) {
133
+ else if (chunk.payload === "")
134
+ callback()
135
+ else {
136
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
137
+ processTimeout = null
138
+ callback(new Error("AWS Polly API timeout"))
139
+ }, 60 * 1000)
140
+ const clearProcessTimeout = () => {
141
+ if (processTimeout !== null) {
142
+ clearTimeout(processTimeout)
143
+ processTimeout = null
144
+ }
145
+ }
133
146
  self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
134
147
  textToSpeech(chunk.payload as string).then((buffer) => {
135
- if (self.closing)
136
- throw new Error("stream destroyed during processing")
148
+ if (self.closing) {
149
+ clearProcessTimeout()
150
+ callback(new Error("stream destroyed during processing"))
151
+ return
152
+ }
153
+ /* calculate actual audio duration from PCM buffer size */
154
+ const durationMs = util.audioBufferDuration(buffer,
155
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
156
+
157
+ /* create new chunk with recalculated timestamps */
137
158
  const chunkNew = chunk.clone()
138
- chunkNew.type = "audio"
139
- chunkNew.payload = buffer
159
+ chunkNew.type = "audio"
160
+ chunkNew.payload = buffer
161
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
162
+ clearProcessTimeout()
140
163
  this.push(chunkNew)
141
164
  callback()
142
165
  }).catch((error: unknown) => {
143
- callback(util.ensureError(error, "failed to send to AWS Polly"))
166
+ clearProcessTimeout()
167
+ callback(util.ensureError(error, "AWS Polly processing failed"))
144
168
  })
145
169
  }
146
- else
147
- callback()
148
170
  },
149
171
  final (callback) {
150
172
  callback()
@@ -24,8 +24,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
24
24
 
25
25
  /* internal state */
26
26
  private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
27
- private closing = false
28
- private resampler: SpeexResampler | null = null
27
+ private resampler: SpeexResampler | null = null
28
+ private closing = false
29
29
 
30
30
  /* construct node */
31
31
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -131,8 +131,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
131
131
  })
132
132
  }
133
133
 
134
- /* establish resampler from ElevenLabs's maximum 24Khz
135
- output to our standard audio sample rate (48KHz) */
134
+ /* establish resampler from ElevenLabs's tier-dependent
135
+ output sample rate to our standard audio sample rate (48KHz) */
136
136
  this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
137
137
 
138
138
  /* create transform stream and connect it to the ElevenLabs API */
@@ -147,6 +147,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
147
147
  callback(new Error("stream already destroyed"))
148
148
  else if (Buffer.isBuffer(chunk.payload))
149
149
  callback(new Error("invalid chunk payload type"))
150
+ else if (chunk.payload === "")
151
+ callback()
150
152
  else {
151
153
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
152
154
  processTimeout = null
@@ -126,11 +126,8 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
126
126
  callback(new Error("stream already destroyed"))
127
127
  else if (Buffer.isBuffer(chunk.payload))
128
128
  callback(new Error("invalid chunk payload type"))
129
- else if (chunk.payload === "") {
130
- /* pass through empty chunks */
131
- this.push(chunk)
129
+ else if (chunk.payload === "")
132
130
  callback()
133
- }
134
131
  else {
135
132
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
136
133
  processTimeout = null
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { KokoroTTS } from "kokoro-js"
12
+ import { Duration } from "luxon"
12
13
  import SpeexResampler from "speex-resampler"
13
14
 
14
15
  /* internal dependencies */
@@ -21,9 +22,9 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
21
22
  public static name = "t2a-kokoro"
22
23
 
23
24
  /* internal state */
24
- private kokoro: KokoroTTS | null = null
25
- private closing = false
25
+ private kokoro: KokoroTTS | null = null
26
26
  private resampler: SpeexResampler | null = null
27
+ private closing = false
27
28
 
28
29
  /* construct node */
29
30
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -122,9 +123,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
122
123
  }
123
124
 
124
125
  /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
125
- const buffer2 = this.resampler!.processChunk(buffer1)
126
-
127
- return buffer2
126
+ return this.resampler!.processChunk(buffer1)
128
127
  }
129
128
 
130
129
  /* create transform stream and connect it to the Kokoro API */
@@ -139,18 +138,42 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
139
138
  callback(new Error("stream already destroyed"))
140
139
  else if (Buffer.isBuffer(chunk.payload))
141
140
  callback(new Error("invalid chunk payload type"))
141
+ else if (chunk.payload === "")
142
+ callback()
142
143
  else {
144
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
145
+ processTimeout = null
146
+ callback(new Error("Kokoro TTS timeout"))
147
+ }, 60 * 1000)
148
+ const clearProcessTimeout = () => {
149
+ if (processTimeout !== null) {
150
+ clearTimeout(processTimeout)
151
+ processTimeout = null
152
+ }
153
+ }
143
154
  text2speech(chunk.payload).then((buffer) => {
144
- if (self.closing)
145
- throw new Error("stream destroyed during processing")
155
+ if (self.closing) {
156
+ clearProcessTimeout()
157
+ callback(new Error("stream destroyed during processing"))
158
+ return
159
+ }
146
160
  self.log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
161
+
162
+ /* calculate actual audio duration from PCM buffer size */
163
+ const durationMs = util.audioBufferDuration(buffer,
164
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
165
+
166
+ /* create new chunk with recalculated timestamps */
147
167
  const chunkNew = chunk.clone()
148
- chunkNew.type = "audio"
149
- chunkNew.payload = buffer
168
+ chunkNew.type = "audio"
169
+ chunkNew.payload = buffer
170
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
171
+ clearProcessTimeout()
150
172
  this.push(chunkNew)
151
173
  callback()
152
174
  }).catch((error: unknown) => {
153
- callback(util.ensureError(error))
175
+ clearProcessTimeout()
176
+ callback(util.ensureError(error, "Kokoro processing failed"))
154
177
  })
155
178
  }
156
179
  },
@@ -103,11 +103,8 @@ export default class SpeechFlowNodeT2AOpenAI extends SpeechFlowNode {
103
103
  callback(new Error("stream already destroyed"))
104
104
  else if (Buffer.isBuffer(chunk.payload))
105
105
  callback(new Error("invalid chunk payload type"))
106
- else if (chunk.payload === "") {
107
- /* pass through empty chunks */
108
- this.push(chunk)
106
+ else if (chunk.payload === "")
109
107
  callback()
110
- }
111
108
  else {
112
109
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
113
110
  processTimeout = null