speechflow 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/{etc/claude.md → AGENTS.md} +8 -3
  2. package/CHANGELOG.md +70 -1
  3. package/README.md +28 -4
  4. package/etc/speechflow.yaml +3 -1
  5. package/etc/stx.conf +1 -1
  6. package/package.json +6 -6
  7. package/speechflow-cli/dst/speechflow-main-api.d.ts +2 -1
  8. package/speechflow-cli/dst/speechflow-main-api.js +57 -16
  9. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  10. package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
  11. package/speechflow-cli/dst/speechflow-main-config.js +1 -1
  12. package/speechflow-cli/dst/speechflow-main-graph.js +55 -21
  13. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-main-nodes.js +1 -1
  15. package/speechflow-cli/dst/speechflow-main-status.js +6 -3
  16. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-main.js +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +7 -10
  19. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +8 -6
  21. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +9 -5
  23. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +6 -5
  25. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
  27. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -4
  29. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +20 -12
  32. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +33 -11
  35. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
  37. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +4 -3
  39. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +2 -2
  41. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
  42. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +19 -11
  43. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  44. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +8 -8
  45. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +33 -29
  47. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  48. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +6 -5
  49. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +2 -1
  51. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +34 -20
  52. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  53. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +13 -5
  54. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-a2t-google.js +3 -2
  56. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +33 -27
  58. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +16 -5
  60. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +17 -5
  62. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-t2a-google.js +17 -5
  64. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2a-kitten.d.ts +15 -0
  66. package/speechflow-cli/dst/speechflow-node-t2a-kitten.js +194 -0
  67. package/speechflow-cli/dst/speechflow-node-t2a-kitten.js.map +1 -0
  68. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +21 -9
  69. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  70. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +17 -5
  71. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
  72. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +21 -7
  73. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -1
  75. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
  76. package/speechflow-cli/dst/speechflow-node-t2t-format.js +1 -1
  77. package/speechflow-cli/dst/speechflow-node-t2t-google.js +4 -2
  78. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  79. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +1 -1
  80. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +1 -1
  81. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
  83. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +1 -1
  84. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
  85. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +34 -14
  86. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +3 -3
  88. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
  89. package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
  90. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +3 -2
  91. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  92. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +1 -1
  93. package/speechflow-cli/dst/speechflow-node-xio-device.js +18 -7
  94. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  95. package/speechflow-cli/dst/speechflow-node-xio-exec.js +23 -11
  96. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
  97. package/speechflow-cli/dst/speechflow-node-xio-file.js +13 -7
  98. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  99. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +25 -12
  100. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  101. package/speechflow-cli/dst/speechflow-node-xio-vban.js +32 -20
  102. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
  103. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +78 -62
  104. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
  105. package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -0
  106. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +63 -18
  107. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  108. package/speechflow-cli/dst/speechflow-node.js +5 -7
  109. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  110. package/speechflow-cli/dst/speechflow-util-audio-wt.js +31 -5
  111. package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -1
  112. package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -1
  113. package/speechflow-cli/dst/speechflow-util-audio.js +25 -14
  114. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  115. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
  116. package/speechflow-cli/dst/speechflow-util-error.js +2 -2
  117. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  118. package/speechflow-cli/dst/speechflow-util-llm.js +1 -1
  119. package/speechflow-cli/dst/speechflow-util-misc.d.ts +3 -2
  120. package/speechflow-cli/dst/speechflow-util-misc.js +63 -6
  121. package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
  122. package/speechflow-cli/dst/speechflow-util-queue.d.ts +5 -17
  123. package/speechflow-cli/dst/speechflow-util-queue.js +57 -78
  124. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  125. package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
  126. package/speechflow-cli/dst/speechflow-util-stream.js +35 -8
  127. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  128. package/speechflow-cli/dst/speechflow-util.js +1 -1
  129. package/speechflow-cli/dst/speechflow.d.ts +1 -1
  130. package/speechflow-cli/dst/speechflow.js +1 -1
  131. package/speechflow-cli/etc/eslint.mjs +1 -1
  132. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  133. package/speechflow-cli/etc/stx.conf +8 -2
  134. package/speechflow-cli/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +2 -1
  135. package/speechflow-cli/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
  136. package/speechflow-cli/package.d/kitten-tts-js+0.1.2.patch +24 -0
  137. package/speechflow-cli/package.d/speex-resampler+3.0.1.patch +56 -0
  138. package/speechflow-cli/package.json +40 -30
  139. package/speechflow-cli/src/lib.d.ts +19 -1
  140. package/speechflow-cli/src/speechflow-main-api.ts +64 -19
  141. package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
  142. package/speechflow-cli/src/speechflow-main-config.ts +1 -1
  143. package/speechflow-cli/src/speechflow-main-graph.ts +56 -22
  144. package/speechflow-cli/src/speechflow-main-nodes.ts +1 -1
  145. package/speechflow-cli/src/speechflow-main-status.ts +6 -3
  146. package/speechflow-cli/src/speechflow-main.ts +1 -1
  147. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +7 -11
  148. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -6
  149. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +10 -5
  150. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +6 -5
  151. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +3 -2
  152. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -4
  153. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +1 -1
  154. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +20 -13
  155. package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +1 -1
  156. package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +43 -16
  157. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +1 -1
  158. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +1 -1
  159. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +4 -3
  160. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +2 -2
  161. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +24 -12
  162. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +10 -9
  163. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +38 -31
  164. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +6 -5
  165. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +35 -22
  166. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +17 -6
  167. package/speechflow-cli/src/speechflow-node-a2t-google.ts +5 -4
  168. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +39 -31
  169. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +16 -5
  170. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +17 -5
  171. package/speechflow-cli/src/speechflow-node-t2a-google.ts +17 -5
  172. package/speechflow-cli/src/speechflow-node-t2a-kitten.ts +178 -0
  173. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +21 -9
  174. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +17 -5
  175. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +21 -7
  176. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -1
  177. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
  178. package/speechflow-cli/src/speechflow-node-t2t-format.ts +1 -1
  179. package/speechflow-cli/src/speechflow-node-t2t-google.ts +4 -2
  180. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +1 -1
  181. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +1 -1
  182. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +1 -1
  183. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
  184. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +1 -1
  185. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
  186. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +39 -15
  187. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +3 -3
  188. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
  189. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -3
  190. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +1 -1
  191. package/speechflow-cli/src/speechflow-node-xio-device.ts +21 -7
  192. package/speechflow-cli/src/speechflow-node-xio-exec.ts +25 -11
  193. package/speechflow-cli/src/speechflow-node-xio-file.ts +15 -7
  194. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +28 -15
  195. package/speechflow-cli/src/speechflow-node-xio-vban.ts +35 -22
  196. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +85 -69
  197. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +67 -20
  198. package/speechflow-cli/src/speechflow-node.ts +7 -8
  199. package/speechflow-cli/src/speechflow-util-audio-wt.ts +46 -7
  200. package/speechflow-cli/src/speechflow-util-audio.ts +27 -15
  201. package/speechflow-cli/src/speechflow-util-error.ts +3 -3
  202. package/speechflow-cli/src/speechflow-util-llm.ts +1 -1
  203. package/speechflow-cli/src/speechflow-util-misc.ts +63 -6
  204. package/speechflow-cli/src/speechflow-util-queue.ts +60 -81
  205. package/speechflow-cli/src/speechflow-util-stream.ts +40 -8
  206. package/speechflow-cli/src/speechflow-util.ts +1 -1
  207. package/speechflow-cli/src/speechflow.ts +1 -1
  208. package/speechflow-ui-db/dst/index.html +1 -1
  209. package/speechflow-ui-db/dst/index.js +15 -15
  210. package/speechflow-ui-db/etc/eslint.mjs +1 -1
  211. package/speechflow-ui-db/etc/oxlint.jsonc +1 -1
  212. package/speechflow-ui-db/etc/stx.conf +1 -1
  213. package/speechflow-ui-db/etc/stylelint.js +1 -1
  214. package/speechflow-ui-db/etc/stylelint.yaml +1 -1
  215. package/speechflow-ui-db/etc/vite-client.mts +1 -1
  216. package/speechflow-ui-db/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
  217. package/speechflow-ui-db/package.json +22 -16
  218. package/speechflow-ui-db/src/app.styl +1 -1
  219. package/speechflow-ui-db/src/app.vue +1 -1
  220. package/speechflow-ui-db/src/index.html +1 -1
  221. package/speechflow-ui-db/src/index.ts +1 -1
  222. package/speechflow-ui-st/dst/index.html +1 -1
  223. package/speechflow-ui-st/dst/index.js +31 -31
  224. package/speechflow-ui-st/etc/eslint.mjs +1 -1
  225. package/speechflow-ui-st/etc/oxlint.jsonc +1 -1
  226. package/speechflow-ui-st/etc/stx.conf +1 -1
  227. package/speechflow-ui-st/etc/stylelint.js +1 -1
  228. package/speechflow-ui-st/etc/stylelint.yaml +1 -1
  229. package/speechflow-ui-st/etc/vite-client.mts +1 -1
  230. package/speechflow-ui-st/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
  231. package/speechflow-ui-st/package.json +23 -17
  232. package/speechflow-ui-st/src/app.styl +1 -1
  233. package/speechflow-ui-st/src/app.vue +1 -1
  234. package/speechflow-ui-st/src/index.html +1 -1
  235. package/speechflow-ui-st/src/index.ts +1 -1
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
3
+ ** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
@@ -25,7 +25,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
25
25
  /* internal state */
26
26
  private openai: OpenAI | null = null
27
27
  private ws: ws.WebSocket | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
28
+ private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
29
29
  private resampler: SpeexResampler | null = null
30
30
  private closing = false
31
31
  private connectionTimeout: ReturnType<typeof setTimeout> | null = null
@@ -67,7 +67,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
67
67
  this.closing = false
68
68
 
69
69
  /* create queue for results */
70
- this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
70
+ this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
71
71
 
72
72
  /* create a store for the meta information */
73
73
  const metastore = new util.TimeStore<Map<string, any>>()
@@ -139,10 +139,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
139
139
  })
140
140
 
141
141
  /* hook onto session events */
142
- this.ws.on("open", () => {
143
- this.log("info", "WebSocket connection opened")
144
- sendMessage({ type: "transcription.create" })
145
- })
146
142
  this.ws.on("close", () => {
147
143
  this.log("info", "WebSocket connection closed")
148
144
  if (!this.closing && this.queue !== null)
@@ -167,8 +163,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
167
163
  }, new Map<string, any>())
168
164
  }
169
165
 
170
- /* track transcription text */
171
- let text = ""
166
+ /* remember opening time to receive time zero offset */
167
+ this.timeOpen = DateTime.now()
168
+
169
+ /* track transcription text per item */
170
+ const textByItem = new Map<string, string>()
172
171
  this.ws.on("message", (data) => {
173
172
  let ev: Record<string, unknown>
174
173
  try {
@@ -186,13 +185,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
186
185
  case "transcription_session.created":
187
186
  break
188
187
  case "conversation.item.created": {
189
- text = ""
188
+ const itemId = (ev.item as Record<string, unknown>)?.id as string
189
+ if (itemId)
190
+ textByItem.set(itemId, "")
190
191
  break
191
192
  }
192
193
  case "conversation.item.input_audio_transcription.delta": {
193
- text += ev.delta as string
194
+ const itemId = ev.item_id as string
195
+ const text = (textByItem.get(itemId) ?? "") + (ev.delta as string)
196
+ textByItem.set(itemId, text)
194
197
  if (this.params.interim && !this.closing && this.queue !== null) {
195
- const itemId = ev.item_id as string
196
198
  const timing = speechTiming.get(itemId)
197
199
  const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
198
200
  const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
@@ -204,7 +206,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
204
206
  }
205
207
  case "conversation.item.input_audio_transcription.completed": {
206
208
  if (!this.closing && this.queue !== null) {
207
- text = ev.transcript as string
209
+ const text = ev.transcript as string
208
210
  const itemId = ev.item_id as string
209
211
  const timing = speechTiming.get(itemId)
210
212
  const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
@@ -213,8 +215,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
213
215
  chunk.meta = aggregateMeta(start, end)
214
216
  metastore.prune(start)
215
217
  speechTiming.delete(itemId)
218
+ textByItem.delete(itemId)
216
219
  this.queue.write(chunk)
217
- text = ""
218
220
  }
219
221
  break
220
222
  }
@@ -248,9 +250,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
248
250
  }
249
251
  })
250
252
 
251
- /* remember opening time to receive time zero offset */
252
- this.timeOpen = DateTime.now()
253
-
254
253
  /* provide Duplex stream and internally attach to OpenAI API */
255
254
  const self = this
256
255
  const reads = new util.PromiseSet<void>()
@@ -260,7 +259,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
260
259
  decodeStrings: false,
261
260
  highWaterMark: 1,
262
261
  write (chunk: SpeechFlowChunk, encoding, callback) {
263
- if (self.closing || self.ws === null) {
262
+ if (self.closing || self.ws === null || self.resampler === null) {
264
263
  callback(new Error("stream already destroyed"))
265
264
  return
266
265
  }
@@ -274,7 +273,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
274
273
  if (chunk.meta.size > 0)
275
274
  metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
276
275
  try {
277
- const payload = self.resampler!.processChunk(chunk.payload)
276
+ const payload = self.resampler.processChunk(chunk.payload)
278
277
  const audioB64 = payload.toString("base64")
279
278
  sendMessage({
280
279
  type: "input_audio_buffer.append",
@@ -296,17 +295,23 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
296
295
  }
297
296
  try {
298
297
  sendMessage({ type: "input_audio_buffer.commit" })
299
- self.ws.close()
300
- await util.sleep(50)
298
+ self.ws?.close()
299
+ await new Promise<void>((resolve) => {
300
+ const timeout = setTimeout(() => { resolve() }, 5000)
301
+ self.ws?.once("close", () => {
302
+ clearTimeout(timeout)
303
+ resolve()
304
+ })
305
+ })
301
306
  }
302
307
  catch (error) {
303
308
  self.log("warning", `error closing OpenAI connection: ${error}`)
304
309
  }
310
+
311
+ /* await all read operations */
305
312
  await reads.awaitAll()
306
- const chunks: Array<SpeechFlowChunk | null> = self.queue?.drain() ?? []
307
- for (const chunk of chunks)
308
- this.push(chunk)
309
- this.push(null)
313
+
314
+ /* NOTICE: do not push null here -- let the WebSocket close event handle it */
310
315
  callback()
311
316
  },
312
317
  read (size) {
@@ -346,6 +351,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
346
351
  this.connectionTimeout = null
347
352
  }
348
353
 
354
+ /* shutdown stream */
355
+ if (this.stream !== null) {
356
+ await util.destroyStream(this.stream)
357
+ this.stream = null
358
+ }
359
+
349
360
  /* signal EOF to any pending read operations */
350
361
  if (this.queue !== null) {
351
362
  this.queue.write(null)
@@ -362,12 +373,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
362
373
  this.openai = null
363
374
 
364
375
  /* close resampler */
365
- this.resampler = null
366
-
367
- /* shutdown stream */
368
- if (this.stream !== null) {
369
- await util.destroyStream(this.stream)
370
- this.stream = null
376
+ if (this.resampler !== null) {
377
+ this.resampler.destroy()
378
+ this.resampler = null
371
379
  }
372
380
  }
373
381
  }
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
3
+ ** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
@@ -131,9 +131,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
131
131
  else if (chunk.payload === "")
132
132
  callback()
133
133
  else {
134
+ let callbackCalled = false
134
135
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
135
136
  processTimeout = null
136
- callback(new Error("AWS Polly API timeout"))
137
+ if (!callbackCalled) {
138
+ callbackCalled = true
139
+ callback(new Error("AWS Polly API timeout"))
140
+ }
137
141
  }, 60 * 1000)
138
142
  const clearProcessTimeout = () => {
139
143
  if (processTimeout !== null) {
@@ -143,8 +147,11 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
143
147
  }
144
148
  self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
145
149
  textToSpeech(chunk.payload as string).then((buffer) => {
150
+ clearProcessTimeout()
151
+ if (callbackCalled)
152
+ return
153
+ callbackCalled = true
146
154
  if (self.closing) {
147
- clearProcessTimeout()
148
155
  callback(new Error("stream destroyed during processing"))
149
156
  return
150
157
  }
@@ -157,11 +164,13 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
157
164
  chunkNew.type = "audio"
158
165
  chunkNew.payload = buffer
159
166
  chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
160
- clearProcessTimeout()
161
167
  this.push(chunkNew)
162
168
  callback()
163
169
  }).catch((error: unknown) => {
164
170
  clearProcessTimeout()
171
+ if (callbackCalled)
172
+ return
173
+ callbackCalled = true
165
174
  callback(util.ensureError(error, "AWS Polly processing failed"))
166
175
  })
167
176
  }
@@ -184,8 +193,10 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
184
193
  }
185
194
 
186
195
  /* destroy resampler */
187
- if (this.resampler !== null)
196
+ if (this.resampler !== null) {
197
+ this.resampler.destroy()
188
198
  this.resampler = null
199
+ }
189
200
 
190
201
  /* destroy AWS Polly API */
191
202
  if (this.client !== null) {
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
3
+ ** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
@@ -150,9 +150,13 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
150
150
  else if (chunk.payload === "")
151
151
  callback()
152
152
  else {
153
+ let callbackCalled = false
153
154
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
154
155
  processTimeout = null
155
- callback(new Error("ElevenLabs API timeout"))
156
+ if (!callbackCalled) {
157
+ callbackCalled = true
158
+ callback(new Error("ElevenLabs API timeout"))
159
+ }
156
160
  }, 60 * 1000)
157
161
  const clearProcessTimeout = () => {
158
162
  if (processTimeout !== null) {
@@ -163,13 +167,17 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
163
167
  try {
164
168
  if (self.closing) {
165
169
  clearProcessTimeout()
170
+ callbackCalled = true
166
171
  callback(new Error("stream destroyed during processing"))
167
172
  return
168
173
  }
169
174
  const stream = await speechStream(chunk.payload as string)
170
175
  const buffer = await getStreamAsBuffer(stream)
176
+ clearProcessTimeout()
177
+ if (callbackCalled)
178
+ return
179
+ callbackCalled = true
171
180
  if (self.closing) {
172
- clearProcessTimeout()
173
181
  callback(new Error("stream destroyed during processing"))
174
182
  return
175
183
  }
@@ -187,12 +195,14 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
187
195
  chunkNew.type = "audio"
188
196
  chunkNew.payload = bufferResampled
189
197
  chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
190
- clearProcessTimeout()
191
198
  this.push(chunkNew)
192
199
  callback()
193
200
  }
194
201
  catch (error) {
195
202
  clearProcessTimeout()
203
+ if (callbackCalled)
204
+ return
205
+ callbackCalled = true
196
206
  callback(util.ensureError(error, "ElevenLabs processing failed"))
197
207
  }
198
208
  }
@@ -215,8 +225,10 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
215
225
  }
216
226
 
217
227
  /* destroy resampler */
218
- if (this.resampler !== null)
228
+ if (this.resampler !== null) {
229
+ this.resampler.destroy()
219
230
  this.resampler = null
231
+ }
220
232
 
221
233
  /* destroy ElevenLabs API */
222
234
  if (this.elevenlabs !== null)
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
3
+ ** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
@@ -129,9 +129,13 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
129
129
  else if (chunk.payload === "")
130
130
  callback()
131
131
  else {
132
+ let callbackCalled = false
132
133
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
133
134
  processTimeout = null
134
- callback(new Error("Google TTS API timeout"))
135
+ if (!callbackCalled) {
136
+ callbackCalled = true
137
+ callback(new Error("Google TTS API timeout"))
138
+ }
135
139
  }, 60 * 1000)
136
140
  const clearProcessTimeout = () => {
137
141
  if (processTimeout !== null) {
@@ -142,12 +146,16 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
142
146
  try {
143
147
  if (self.closing) {
144
148
  clearProcessTimeout()
149
+ callbackCalled = true
145
150
  callback(new Error("stream destroyed during processing"))
146
151
  return
147
152
  }
148
153
  const buffer = await textToSpeech(chunk.payload as string)
154
+ clearProcessTimeout()
155
+ if (callbackCalled)
156
+ return
157
+ callbackCalled = true
149
158
  if (self.closing) {
150
- clearProcessTimeout()
151
159
  callback(new Error("stream destroyed during processing"))
152
160
  return
153
161
  }
@@ -161,12 +169,14 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
161
169
  chunkNew.type = "audio"
162
170
  chunkNew.payload = buffer
163
171
  chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
164
- clearProcessTimeout()
165
172
  this.push(chunkNew)
166
173
  callback()
167
174
  }
168
175
  catch (error) {
169
176
  clearProcessTimeout()
177
+ if (callbackCalled)
178
+ return
179
+ callbackCalled = true
170
180
  callback(util.ensureError(error, "Google TTS processing failed"))
171
181
  }
172
182
  }
@@ -189,8 +199,10 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
189
199
  }
190
200
 
191
201
  /* destroy resampler */
192
- if (this.resampler !== null)
202
+ if (this.resampler !== null) {
203
+ this.resampler.destroy()
193
204
  this.resampler = null
205
+ }
194
206
 
195
207
  /* destroy Google TTS client */
196
208
  if (this.client !== null) {
@@ -0,0 +1,178 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { KittenTTS } from "kitten-tts-js"
12
+ import { Duration } from "luxon"
13
+ import SpeexResampler from "speex-resampler"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as util from "./speechflow-util"
18
+
19
+ /* SpeechFlow node for Kitten text-to-speech conversion */
20
+ export default class SpeechFlowNodeT2AKitten extends SpeechFlowNode {
21
+ /* declare official node name */
22
+ public static name = "t2a-kitten"
23
+
24
+ /* internal state */
25
+ private kitten: KittenTTS | null = null
26
+ private resampler: SpeexResampler | null = null
27
+ private closing = false
28
+
29
+ /* construct node */
30
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
31
+ super(id, cfg, opts, args)
32
+
33
+ /* declare node configuration parameters */
34
+ this.configure({
35
+ model: { type: "string", val: "KittenML/kitten-tts-nano-0.8", pos: 0, match: /^.+$/ },
36
+ voice: { type: "string", val: "Bruno", pos: 1, match: /^(?:Bella|Jasper|Luna|Bruno|Rosie|Hugo|Kiki|Leo)$/ },
37
+ speed: { type: "number", val: 1.25, pos: 2, match: (n: number) => n >= 0.5 && n <= 2.0 }
38
+ })
39
+
40
+ /* declare node input/output format */
41
+ this.input = "text"
42
+ this.output = "audio"
43
+ }
44
+
45
+ /* one-time status of node */
46
+ async status () {
47
+ return {}
48
+ }
49
+
50
+ /* open node */
51
+ async open () {
52
+ /* clear destruction flag */
53
+ this.closing = false
54
+
55
+ /* establish Kitten TTS */
56
+ this.kitten = await KittenTTS.from_pretrained(this.params.model)
57
+ if (this.kitten === null)
58
+ throw new Error("failed to instantiate Kitten TTS")
59
+
60
+ /* establish resampler from Kitten's 24Khz
61
+ output to our standard audio sample rate (48KHz) */
62
+ this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
63
+
64
+ /* perform text-to-speech operation with Kitten TTS API */
65
+ const text2speech = async (text: string) => {
66
+ this.log("info", `Kitten TTS: input: "${text}"`)
67
+ const audio = await this.kitten!.generate(text, {
68
+ voice: this.params.voice,
69
+ speed: this.params.speed
70
+ })
71
+ if (audio.sampling_rate !== 24000)
72
+ throw new Error("expected 24KHz sampling rate in Kitten TTS output")
73
+
74
+ /* convert audio samples from PCM/F32/24Khz to PCM/I16/24KHz */
75
+ const samples = audio.data
76
+ const buffer1 = Buffer.alloc(samples.length * 2)
77
+ for (let i = 0; i < samples.length; i++) {
78
+ const sample = Math.max(-1, Math.min(1, samples[i]))
79
+ buffer1.writeInt16LE(sample * 0x7FFF, i * 2)
80
+ }
81
+
82
+ /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
83
+ if (this.resampler === null)
84
+ throw new Error("resampler already destroyed")
85
+ return this.resampler.processChunk(buffer1)
86
+ }
87
+
88
+ /* create transform stream and connect it to the Kitten TTS API */
89
+ const self = this
90
+ this.stream = new Stream.Transform({
91
+ writableObjectMode: true,
92
+ readableObjectMode: true,
93
+ decodeStrings: false,
94
+ highWaterMark: 1,
95
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
96
+ if (self.closing)
97
+ callback(new Error("stream already destroyed"))
98
+ else if (Buffer.isBuffer(chunk.payload))
99
+ callback(new Error("invalid chunk payload type"))
100
+ else if (chunk.payload === "")
101
+ callback()
102
+ else {
103
+ let callbackCalled = false
104
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
105
+ processTimeout = null
106
+ if (!callbackCalled) {
107
+ callbackCalled = true
108
+ callback(new Error("Kitten TTS timeout"))
109
+ }
110
+ }, 60 * 1000)
111
+ const clearProcessTimeout = () => {
112
+ if (processTimeout !== null) {
113
+ clearTimeout(processTimeout)
114
+ processTimeout = null
115
+ }
116
+ }
117
+ text2speech(chunk.payload).then((buffer) => {
118
+ clearProcessTimeout()
119
+ if (callbackCalled)
120
+ return
121
+ callbackCalled = true
122
+ if (self.closing) {
123
+ callback(new Error("stream destroyed during processing"))
124
+ return
125
+ }
126
+ self.log("info", `Kitten TTS: received audio (buffer length: ${buffer.byteLength})`)
127
+
128
+ /* calculate actual audio duration from PCM buffer size */
129
+ const durationMs = util.audioBufferDuration(buffer,
130
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
131
+
132
+ /* create new chunk with recalculated timestamps */
133
+ const chunkNew = chunk.clone()
134
+ chunkNew.type = "audio"
135
+ chunkNew.payload = buffer
136
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
137
+ this.push(chunkNew)
138
+ callback()
139
+ }).catch((error: unknown) => {
140
+ clearProcessTimeout()
141
+ if (callbackCalled)
142
+ return
143
+ callbackCalled = true
144
+ callback(util.ensureError(error, "Kitten TTS processing failed"))
145
+ })
146
+ }
147
+ },
148
+ final (callback) {
149
+ callback()
150
+ }
151
+ })
152
+ }
153
+
154
+ /* close node */
155
+ async close () {
156
+ /* indicate closing */
157
+ this.closing = true
158
+
159
+ /* shutdown stream */
160
+ if (this.stream !== null) {
161
+ await util.destroyStream(this.stream)
162
+ this.stream = null
163
+ }
164
+
165
+ /* destroy resampler */
166
+ if (this.resampler !== null) {
167
+ this.resampler.destroy()
168
+ this.resampler = null
169
+ }
170
+
171
+ /* destroy Kitten TTS API */
172
+ if (this.kitten !== null) {
173
+ await this.kitten.release()
174
+ this.kitten = null
175
+ }
176
+ }
177
+ }
178
+
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
3
+ ** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
@@ -81,11 +81,12 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
81
81
  this.kokoro = await KokoroTTS.from_pretrained(model, {
82
82
  dtype: "q4f16",
83
83
  progress_callback: progressCallback
84
+ }).finally(() => {
85
+ if (interval !== null) {
86
+ clearInterval(interval)
87
+ interval = null
88
+ }
84
89
  })
85
- if (interval !== null) {
86
- clearInterval(interval)
87
- interval = null
88
- }
89
90
  if (this.kokoro === null)
90
91
  throw new Error("failed to instantiate Kokoro")
91
92
 
@@ -141,9 +142,13 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
141
142
  else if (chunk.payload === "")
142
143
  callback()
143
144
  else {
145
+ let callbackCalled = false
144
146
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
145
147
  processTimeout = null
146
- callback(new Error("Kokoro TTS timeout"))
148
+ if (!callbackCalled) {
149
+ callbackCalled = true
150
+ callback(new Error("Kokoro TTS timeout"))
151
+ }
147
152
  }, 60 * 1000)
148
153
  const clearProcessTimeout = () => {
149
154
  if (processTimeout !== null) {
@@ -152,8 +157,11 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
152
157
  }
153
158
  }
154
159
  text2speech(chunk.payload).then((buffer) => {
160
+ clearProcessTimeout()
161
+ if (callbackCalled)
162
+ return
163
+ callbackCalled = true
155
164
  if (self.closing) {
156
- clearProcessTimeout()
157
165
  callback(new Error("stream destroyed during processing"))
158
166
  return
159
167
  }
@@ -168,11 +176,13 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
168
176
  chunkNew.type = "audio"
169
177
  chunkNew.payload = buffer
170
178
  chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
171
- clearProcessTimeout()
172
179
  this.push(chunkNew)
173
180
  callback()
174
181
  }).catch((error: unknown) => {
175
182
  clearProcessTimeout()
183
+ if (callbackCalled)
184
+ return
185
+ callbackCalled = true
176
186
  callback(util.ensureError(error, "Kokoro processing failed"))
177
187
  })
178
188
  }
@@ -195,8 +205,10 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
195
205
  }
196
206
 
197
207
  /* destroy resampler */
198
- if (this.resampler !== null)
208
+ if (this.resampler !== null) {
209
+ this.resampler.destroy()
199
210
  this.resampler = null
211
+ }
200
212
 
201
213
  /* destroy Kokoro API */
202
214
  if (this.kokoro !== null)