speechflow 1.5.1 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +192 -171
  3. package/etc/claude.md +83 -46
  4. package/etc/speechflow.yaml +84 -84
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-api.d.ts +12 -0
  7. package/speechflow-cli/dst/speechflow-main-api.js +319 -0
  8. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -0
  9. package/speechflow-cli/dst/speechflow-main-cli.d.ts +28 -0
  10. package/speechflow-cli/dst/speechflow-main-cli.js +271 -0
  11. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -0
  12. package/speechflow-cli/dst/speechflow-main-config.d.ts +9 -0
  13. package/speechflow-cli/dst/speechflow-main-config.js +27 -0
  14. package/speechflow-cli/dst/speechflow-main-config.js.map +1 -0
  15. package/speechflow-cli/dst/speechflow-main-graph.d.ts +34 -0
  16. package/speechflow-cli/dst/speechflow-main-graph.js +367 -0
  17. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -0
  18. package/speechflow-cli/dst/speechflow-main-nodes.d.ts +10 -0
  19. package/speechflow-cli/dst/speechflow-main-nodes.js +60 -0
  20. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -0
  21. package/speechflow-cli/dst/speechflow-main-status.d.ts +11 -0
  22. package/speechflow-cli/dst/speechflow-main-status.js +60 -0
  23. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -0
  24. package/speechflow-cli/dst/speechflow-main.d.ts +7 -0
  25. package/speechflow-cli/dst/speechflow-main.js +127 -0
  26. package/speechflow-cli/dst/speechflow-main.js.map +1 -0
  27. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +4 -4
  28. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +8 -9
  31. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +5 -5
  33. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  36. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  37. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.d.ts +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +8 -8
  39. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +6 -6
  42. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
  44. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +5 -5
  45. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +7 -7
  48. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +5 -5
  51. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
  53. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +3 -3
  54. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
  56. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +7 -7
  57. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
  59. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +7 -7
  60. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
  62. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +7 -7
  63. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  64. package/speechflow-cli/dst/speechflow-node-a2a-wav.d.ts +1 -1
  65. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +3 -3
  66. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  67. package/speechflow-cli/dst/{speechflow-node-a2t-awstranscribe.d.ts → speechflow-node-a2t-amazon.d.ts} +1 -1
  68. package/speechflow-cli/dst/{speechflow-node-a2t-awstranscribe.js → speechflow-node-a2t-amazon.js} +11 -11
  69. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -0
  70. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
  71. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +7 -7
  72. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  73. package/speechflow-cli/dst/{speechflow-node-a2t-openaitranscribe.d.ts → speechflow-node-a2t-openai.d.ts} +1 -1
  74. package/speechflow-cli/dst/{speechflow-node-a2t-openaitranscribe.js → speechflow-node-a2t-openai.js} +11 -11
  75. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -0
  76. package/speechflow-cli/dst/{speechflow-node-t2a-awspolly.d.ts → speechflow-node-t2a-amazon.d.ts} +1 -1
  77. package/speechflow-cli/dst/{speechflow-node-t2a-awspolly.js → speechflow-node-t2a-amazon.js} +9 -9
  78. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -0
  79. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  80. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -5
  81. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  83. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +7 -7
  84. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  85. package/speechflow-cli/dst/{speechflow-node-t2t-awstranslate.d.ts → speechflow-node-t2t-amazon.d.ts} +1 -1
  86. package/speechflow-cli/dst/{speechflow-node-t2t-awstranslate.js → speechflow-node-t2t-amazon.js} +7 -7
  87. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -0
  88. package/speechflow-cli/dst/speechflow-node-t2t-deepl.d.ts +1 -1
  89. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +5 -5
  90. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  91. package/speechflow-cli/dst/speechflow-node-t2t-format.d.ts +1 -1
  92. package/speechflow-cli/dst/speechflow-node-t2t-format.js +3 -3
  93. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  94. package/speechflow-cli/dst/speechflow-node-t2t-google.d.ts +1 -1
  95. package/speechflow-cli/dst/speechflow-node-t2t-google.js +8 -8
  96. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  97. package/speechflow-cli/dst/{speechflow-node-a2a-dynamics.d.ts → speechflow-node-t2t-modify.d.ts} +1 -5
  98. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +111 -0
  99. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -0
  100. package/speechflow-cli/dst/speechflow-node-t2t-ollama.d.ts +1 -1
  101. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +5 -5
  102. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  103. package/speechflow-cli/dst/speechflow-node-t2t-openai.d.ts +1 -1
  104. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +5 -5
  105. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  106. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
  107. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +5 -5
  108. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  109. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.d.ts +1 -1
  110. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -5
  111. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  112. package/speechflow-cli/dst/speechflow-node-t2t-transformers.d.ts +1 -1
  113. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +5 -5
  114. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  115. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -1
  116. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +5 -5
  117. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  118. package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
  119. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +3 -3
  120. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  121. package/speechflow-cli/dst/speechflow-node-xio-device.d.ts +1 -1
  122. package/speechflow-cli/dst/speechflow-node-xio-device.js +8 -8
  123. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  124. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -1
  125. package/speechflow-cli/dst/speechflow-node-xio-file.js +50 -29
  126. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  127. package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -1
  128. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +7 -7
  129. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  130. package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -1
  131. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +10 -10
  132. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  133. package/speechflow-cli/dst/{speechflow-utils-audio-wt.js → speechflow-util-audio-wt.js} +1 -1
  134. package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -0
  135. package/speechflow-cli/dst/speechflow-util-audio.d.ts +22 -0
  136. package/speechflow-cli/dst/speechflow-util-audio.js +251 -0
  137. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -0
  138. package/speechflow-cli/dst/speechflow-util-error.d.ts +14 -0
  139. package/speechflow-cli/dst/speechflow-util-error.js +131 -0
  140. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -0
  141. package/speechflow-cli/dst/speechflow-util-queue.d.ts +68 -0
  142. package/speechflow-cli/dst/speechflow-util-queue.js +338 -0
  143. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -0
  144. package/speechflow-cli/dst/speechflow-util-stream.d.ts +18 -0
  145. package/speechflow-cli/dst/speechflow-util-stream.js +219 -0
  146. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -0
  147. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +124 -0
  148. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +1 -0
  149. package/speechflow-cli/dst/{speechflow-utils-audio.js → speechflow-util-webaudio.js} +2 -2
  150. package/speechflow-cli/dst/speechflow-util-webaudio.js.map +1 -0
  151. package/speechflow-cli/dst/speechflow-util.d.ts +4 -0
  152. package/speechflow-cli/dst/speechflow-util.js +26 -0
  153. package/speechflow-cli/dst/speechflow-util.js.map +1 -0
  154. package/speechflow-cli/dst/speechflow.js +3 -906
  155. package/speechflow-cli/dst/speechflow.js.map +1 -1
  156. package/speechflow-cli/etc/oxlint.jsonc +4 -1
  157. package/speechflow-cli/package.json +12 -11
  158. package/speechflow-cli/src/speechflow-main-api.ts +315 -0
  159. package/speechflow-cli/src/speechflow-main-cli.ts +259 -0
  160. package/speechflow-cli/src/speechflow-main-config.ts +17 -0
  161. package/speechflow-cli/src/speechflow-main-graph.ts +372 -0
  162. package/speechflow-cli/src/speechflow-main-nodes.ts +61 -0
  163. package/speechflow-cli/src/speechflow-main-status.ts +70 -0
  164. package/speechflow-cli/src/speechflow-main.ts +106 -0
  165. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -4
  166. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +7 -8
  167. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +5 -5
  168. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +7 -8
  169. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +7 -7
  170. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +6 -6
  171. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +4 -4
  172. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +6 -6
  173. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +4 -4
  174. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +2 -2
  175. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +6 -6
  176. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +6 -6
  177. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +6 -6
  178. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -2
  179. package/speechflow-cli/src/{speechflow-node-a2t-awstranscribe.ts → speechflow-node-a2t-amazon.ts} +10 -10
  180. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +7 -7
  181. package/speechflow-cli/src/{speechflow-node-a2t-openaitranscribe.ts → speechflow-node-a2t-openai.ts} +10 -10
  182. package/speechflow-cli/src/{speechflow-node-t2a-awspolly.ts → speechflow-node-t2a-amazon.ts} +7 -7
  183. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +4 -4
  184. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +6 -6
  185. package/speechflow-cli/src/{speechflow-node-t2t-awstranslate.ts → speechflow-node-t2t-amazon.ts} +5 -5
  186. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +4 -4
  187. package/speechflow-cli/src/speechflow-node-t2t-format.ts +2 -2
  188. package/speechflow-cli/src/speechflow-node-t2t-google.ts +7 -7
  189. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +84 -0
  190. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +4 -4
  191. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +4 -4
  192. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +4 -4
  193. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +9 -9
  194. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +4 -4
  195. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -4
  196. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +2 -2
  197. package/speechflow-cli/src/speechflow-node-xio-device.ts +7 -7
  198. package/speechflow-cli/src/speechflow-node-xio-file.ts +49 -28
  199. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +7 -7
  200. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  201. package/speechflow-cli/src/{speechflow-utils-audio.ts → speechflow-util-audio.ts} +131 -1
  202. package/speechflow-cli/src/speechflow-util-error.ts +184 -0
  203. package/speechflow-cli/src/speechflow-util-queue.ts +320 -0
  204. package/speechflow-cli/src/speechflow-util-stream.ts +197 -0
  205. package/speechflow-cli/src/speechflow-util.ts +10 -0
  206. package/speechflow-cli/src/speechflow.ts +3 -947
  207. package/speechflow-ui-db/package.json +3 -3
  208. package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
  209. package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
  210. package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
  211. package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
  212. package/speechflow-ui-st/dst/index.css +2 -2
  213. package/speechflow-ui-st/dst/index.js +32 -33
  214. package/speechflow-ui-st/package.json +4 -4
  215. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +0 -208
  216. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +0 -1
  217. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +0 -312
  218. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +0 -1
  219. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +0 -1
  220. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +0 -1
  221. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +0 -1
  222. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +0 -1
  223. package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +0 -1
  224. package/speechflow-cli/dst/speechflow-utils-audio.js.map +0 -1
  225. package/speechflow-cli/dst/speechflow-utils.d.ts +0 -108
  226. package/speechflow-cli/dst/speechflow-utils.js +0 -740
  227. package/speechflow-cli/dst/speechflow-utils.js.map +0 -1
  228. package/speechflow-cli/src/speechflow-utils.ts +0 -804
  229. /package/speechflow-cli/dst/{speechflow-node-a2a-dynamics-wt.d.ts → speechflow-util-audio-wt.d.ts} +0 -0
  230. /package/speechflow-cli/dst/{speechflow-utils-audio-wt.d.ts → speechflow-util-webaudio-wt.d.ts} +0 -0
  231. /package/speechflow-cli/dst/{speechflow-utils-audio.d.ts → speechflow-util-webaudio.d.ts} +0 -0
  232. /package/speechflow-cli/src/{speechflow-utils-audio-wt.ts → speechflow-util-audio-wt.ts} +0 -0
package/README.md CHANGED
@@ -52,6 +52,7 @@ local [OPUS/ONNX](https://github.com/Helsinki-NLP/Opus-MT) text-to-text translat
52
52
  local [FFmpeg](https://ffmpeg.org/) speech-to-speech encoding,
53
53
  local WAV speech-to-speech encoding,
54
54
  local text-to-text formatting,
55
+ local text-to-text regex-based modification,
55
56
  local text-to-text sentencing merging/splitting,
56
57
  local text-to-text subtitle generation,
57
58
  local text or audio filter, and
@@ -75,18 +76,18 @@ and real-time translated to English.
75
76
  First, the used configuration was a straight linear pipeline in file `sample.conf`:
76
77
 
77
78
  ```txt
78
- device(device: "coreaudio:Elgato Wave:3", mode: "r") |
79
- meter(interval: 50, dashboard: "meter1") |
80
- deepgram(language: "de", model: "nova-2", interim: true) |
81
- trace(type: "text", dashboard: "text1") |
82
- filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
83
- sentence() |
84
- trace(type: "text", dashboard: "text2") |
85
- deepl(src: "de", dst: "en") |
86
- trace(type: "text", dashboard: "text3") |
87
- elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
88
- meter(interval: 50, dashboard: "meter2") |
89
- device(device: "coreaudio:USBAudio2.0", mode: "w")
79
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
80
+ a2a-meter(interval: 50, dashboard: "meter1") |
81
+ a2t-deepgram(language: "de", model: "nova-2", interim: true) |
82
+ x2x-trace(type: "text", dashboard: "text1") |
83
+ x2x-filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
84
+ t2t-sentence() |
85
+ x2x-trace(type: "text", dashboard: "text2") |
86
+ t2t-deepl(src: "de", dst: "en") |
87
+ x2x-trace(type: "text", dashboard: "text3") |
88
+ t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
89
+ a2a-meter(interval: 50, dashboard: "meter2") |
90
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
90
91
  ```
91
92
 
92
93
  Second, the corresponding **SpeechFlow** command was:
@@ -100,13 +101,13 @@ Finally, the resulting dashboard under URL `http://127.0.0.1:8484/` was:
100
101
 
101
102
  ![dashboard](etc/speechflow.png)
102
103
 
103
- On the left you can see the volume meter of the microphone (`device`),
104
+ On the left you can see the volume meter of the microphone (`xio-device`),
104
105
  followed by the German result of the speech-to-text conversion
105
- (`deepgram`), followed by the still German results of the text-to-text
106
- sentence splitting/aggregation (`sentence`), followed by the English
107
- results of the text-to-text translation (`deepl`) and then finally on
106
+ (`a2t-deepgram`), followed by the still German results of the text-to-text
107
+ sentence splitting/aggregation (`t2t-sentence`), followed by the English
108
+ results of the text-to-text translation (`t2t-deepl`) and then finally on
108
109
  the right you can see the volume meter of the text-to-speech conversion
109
- (`elevenlabs`).
110
+ (`t2a-elevenlabs`).
110
111
 
111
112
  The entire **SpeechFlow** processing pipeline runs in real-time and
112
113
  the latency between input and output audio is about 2-3 seconds, very
@@ -188,92 +189,92 @@ They can also be found in the sample [speechflow.yaml](./etc/speechflow.yaml) fi
188
189
  - **Capturing**: Capture audio from microphone device into WAV audio file:
189
190
 
190
191
  ```
191
- device(device: "wasapi:VoiceMeeter Out B1", mode: "r") |
192
- wav(mode: "encode") |
193
- file(path: "capture.wav", mode: "w", type: "audio")
192
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
193
+ a2a-wav(mode: "encode") |
194
+ xio-file(path: "capture.wav", mode: "w", type: "audio")
194
195
  ```
195
196
 
196
197
  - **Pass-Through**: Pass-through audio from microphone device to speaker
197
198
  device and in parallel record it to WAV audio file:
198
199
 
199
200
  ```
200
- device(device: "wasapi:VoiceMeeter Out B1", mode: "r") | {
201
- wav(mode: "encode") |
202
- file(path: "capture.wav", mode: "w", type: "audio"),
203
- device(device: "wasapi:VoiceMeeter VAIO3 Input", mode: "w")
201
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
202
+ a2a-wav(mode: "encode") |
203
+ xio-file(path: "capture.wav", mode: "w", type: "audio"),
204
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
204
205
  }
205
206
  ```
206
207
 
207
208
  - **Transcription**: Generate text file with German transcription of MP3 audio file:
208
209
 
209
210
  ```
210
- file(path: argv.0, mode: "r", type: "audio") |
211
- ffmpeg(src: "mp3", dst: "pcm") |
212
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
213
- format(width: 80) |
214
- file(path: argv.1, mode: "w", type: "text")
211
+ xio-file(path: argv.0, mode: "r", type: "audio") |
212
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
213
+ a2t-deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
214
+ t2t-format(width: 80) |
215
+ xio-file(path: argv.1, mode: "w", type: "text")
215
216
  ```
216
217
 
217
218
  - **Subtitling**: Generate text file with German subtitles of MP3 audio file:
218
219
 
219
220
  ```
220
- file(path: argv.0, mode: "r", type: "audio") |
221
- ffmpeg(src: "mp3", dst: "pcm") |
222
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
223
- subtitle(format: "vtt") |
224
- file(path: argv.1, mode: "w", type: "text")
221
+ xio-file(path: argv.0, mode: "r", type: "audio") |
222
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
223
+ a2t-deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
224
+ t2t-subtitle(format: "vtt") |
225
+ xio-file(path: argv.1, mode: "w", type: "text")
225
226
  ```
226
227
 
227
228
  - **Speaking**: Generate audio file with English voice for a text file:
228
229
 
229
230
  ```
230
- file(path: argv.0, mode: "r", type: "text") |
231
- kokoro(language: "en") |
232
- wav(mode: "encode") |
233
- file(path: argv.1, mode: "w", type: "audio")
231
+ xio-file(path: argv.0, mode: "r", type: "text") |
232
+ t2a-kokoro(language: "en") |
233
+ a2a-wav(mode: "encode") |
234
+ xio-file(path: argv.1, mode: "w", type: "audio")
234
235
  ```
235
236
 
236
237
  - **Ad-Hoc Translation**: Ad-Hoc text translation from German to English
237
238
  via stdin/stdout:
238
239
 
239
240
  ```
240
- file(path: "-", mode: "r", type: "text") |
241
- deepl(src: "de", dst: "en") |
242
- file(path: "-", mode: "w", type: "text")
241
+ xio-file(path: "-", mode: "r", type: "text") |
242
+ t2t-deepl(src: "de", dst: "en") |
243
+ xio-file(path: "-", mode: "w", type: "text")
243
244
  ```
244
245
 
245
246
  - **Studio Translation**: Real-time studio translation from German to English,
246
247
  including the capturing of all involved inputs and outputs:
247
248
 
248
249
  ```
249
- device(device: "coreaudio:Elgato Wave:3", mode: "r") | {
250
- gender() | {
251
- meter(interval: 250) |
252
- wav(mode: "encode") |
253
- file(path: "program-de.wav", mode: "w", type: "audio"),
254
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) | {
255
- sentence() | {
256
- format(width: 80) |
257
- file(path: "program-de.txt", mode: "w", type: "text"),
258
- deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) | {
259
- trace(name: "text", type: "text") | {
260
- format(width: 80) |
261
- file(path: "program-en.txt", mode: "w", type: "text"),
262
- subtitle(format: "srt") |
263
- file(path: "program-en.srt", mode: "w", type: "text"),
264
- mqtt(url: "mqtt://10.1.0.10:1883",
250
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
251
+ a2a-gender() | {
252
+ a2a-meter(interval: 250) |
253
+ a2a-wav(mode: "encode") |
254
+ xio-file(path: "program-de.wav", mode: "w", type: "audio"),
255
+ a2t-deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) | {
256
+ t2t-sentence() | {
257
+ t2t-format(width: 80) |
258
+ xio-file(path: "program-de.txt", mode: "w", type: "text"),
259
+ t2t-deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) | {
260
+ x2x-trace(name: "text", type: "text") | {
261
+ t2t-format(width: 80) |
262
+ xio-file(path: "program-en.txt", mode: "w", type: "text"),
263
+ t2t-subtitle(format: "srt") |
264
+ xio-file(path: "program-en.srt", mode: "w", type: "text"),
265
+ xio-mqtt(url: "mqtt://10.1.0.10:1883",
265
266
  username: env.SPEECHFLOW_MQTT_USER,
266
267
  password: env.SPEECHFLOW_MQTT_PASS,
267
268
  topicWrite: "stream/studio/sender"),
268
269
  {
269
- filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
270
- elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
271
- filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
272
- elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
270
+ x2x-filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
271
+ t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
272
+ x2x-filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
273
+ t2a-elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
273
274
  } | {
274
- wav(mode: "encode") |
275
- file(path: "program-en.wav", mode: "w", type: "audio"),
276
- device(device: "coreaudio:USBAudio2.0", mode: "w")
275
+ a2a-wav(mode: "encode") |
276
+ xio-file(path: "program-en.wav", mode: "w", type: "audio"),
277
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
277
278
  }
278
279
  }
279
280
  }
@@ -289,52 +290,54 @@ Processing Node Types
289
290
  First a short overview of the available processing nodes:
290
291
 
291
292
  - Input/Output nodes:
292
- **file**,
293
- **device**,
294
- **websocket**,
295
- **mqtt**.
293
+ **xio-file**,
294
+ **xio-device**,
295
+ **xio-websocket**,
296
+ **xio-mqtt**.
296
297
  - Audio-to-Audio nodes:
297
- **ffmpeg**,
298
- **wav**,
299
- **mute**,
300
- **meter**,
301
- **vad**,
302
- **gender**,
303
- **speex**,
304
- **rrnoise**,
305
- **compressor**,
306
- **expander**,
307
- **gain**,
308
- **filler**.
298
+ **a2a-ffmpeg**,
299
+ **a2a-wav**,
300
+ **a2a-mute**,
301
+ **a2a-meter**,
302
+ **a2a-vad**,
303
+ **a2a-gender**,
304
+ **a2a-speex**,
305
+ **a2a-rnnoise**,
306
+ **a2a-compressor**,
307
+ **a2a-expander**,
308
+ **a2a-gain**,
309
+ **a2a-filler**.
309
310
  - Audio-to-Text nodes:
310
- **openaitranscribe**,
311
- **awstranscribe**,
312
- **deepgram**.
311
+ **a2t-openai**,
312
+ **a2t-amazon**,
313
+ **a2t-deepgram**.
313
314
  - Text-to-Text nodes:
314
- **deepl**,
315
- **awstranslate**,
316
- **openai**,
317
- **ollama**,
318
- **transformers**,
319
- **google**,
320
- **subtitle**,
321
- **format**.
315
+ **t2t-deepl**,
316
+ **t2t-amazon**,
317
+ **t2t-openai**,
318
+ **t2t-ollama**,
319
+ **t2t-transformers**,
320
+ **t2t-google**,
321
+ **t2t-modify**,
322
+ **t2t-subtitle**,
323
+ **t2t-format**,
324
+ **t2t-sentence**.
322
325
  - Text-to-Audio nodes:
323
- **awspolly**.
324
- **elevenlabs**.
325
- **kokoro**.
326
+ **t2a-amazon**,
327
+ **t2a-elevenlabs**,
328
+ **t2a-kokoro**.
326
329
  - Any-to-Any nodes:
327
- **filter**,
328
- **trace**.
330
+ **x2x-filter**,
331
+ **x2x-trace**.
329
332
 
330
333
  ### Input/Output Nodes
331
334
 
332
335
  The following nodes are for external I/O, i.e, to read/write from
333
336
  external files, devices and network services.
334
337
 
335
- - Node: **file**<br/>
338
+ - Node: **xio-file**<br/>
336
339
  Purpose: **File and StdIO source/sink**<br/>
337
- Example: `file(path: "capture.pcm", mode: "w", type: "audio")`
340
+ Example: `xio-file(path: "capture.pcm", mode: "w", type: "audio")`
338
341
 
339
342
  > This node allows the reading/writing from/to files or from StdIO. It
340
343
  > is intended to be used as source and sink nodes in batch processing,
@@ -353,9 +356,9 @@ external files, devices and network services.
353
356
  | **chunka** | | 200 | `10 <= n <= 1000` |
354
357
  | **chunkt** | | 65536 | `1024 <= n <= 131072` |
355
358
 
356
- - Node: **device**<br/>
359
+ - Node: **xio-device**<br/>
357
360
  Purpose: **Microphone/speaker device source/sink**<br/>
358
- Example: `device(device: "wasapi:VoiceMeeter Out B1", mode: "r")`
361
+ Example: `xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r")`
359
362
 
360
363
  > This node allows the reading/writing from/to audio devices. It is
361
364
  > intended to be used as source nodes for microphone devices and as
@@ -372,9 +375,9 @@ external files, devices and network services.
372
375
  | **mode** | 1 | "rw" | `/^(?:r\|w\|rw)$/` |
373
376
  | **chunk** | 2 | 200 | `10 <= n <= 1000` |
374
377
 
375
- - Node: **websocket**<br/>
378
+ - Node: **xio-websocket**<br/>
376
379
  Purpose: **WebSocket source/sink**<br/>
377
- Example: `websocket(connect: "ws://127.0.0.1:12345", type: "text")`
380
+ Example: `xio-websocket(connect: "ws://127.0.0.1:12345", type: "text")`
378
381
  Notice: this node requires a peer WebSocket service!
379
382
 
380
383
  > This node allows reading/writing from/to WebSocket network services.
@@ -393,9 +396,9 @@ external files, devices and network services.
393
396
  | **connect** | *none* | *none* | `/^(?:\|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/` |
394
397
  | **type** | *none* | "audio" | `/^(?:audio\|text)$/` |
395
398
 
396
- - Node: **mqtt**<br/>
399
+ - Node: **xio-mqtt**<br/>
397
400
  Purpose: **MQTT sink**<br/>
398
- Example: `mqtt(url: "mqtt://127.0.0.1:1883", username: "foo", password: "bar", topic: "quux")`
401
+ Example: `xio-mqtt(url: "mqtt://127.0.0.1:1883", username: "foo", password: "bar", topic: "quux")`
399
402
  Notice: this node requires a peer MQTT broker!
400
403
 
401
404
  > This node allows reading/writing from/to MQTT broker topics. It is
@@ -418,9 +421,9 @@ external files, devices and network services.
418
421
 
419
422
  The following nodes process audio chunks only.
420
423
 
421
- - Node: **ffmpeg**<br/>
424
+ - Node: **a2a-ffmpeg**<br/>
422
425
  Purpose: **FFmpeg audio format conversion**<br/>
423
- Example: `ffmpeg(src: "pcm", dst: "mp3")`
426
+ Example: `a2a-ffmpeg(src: "pcm", dst: "mp3")`
424
427
 
425
428
  > This node allows converting between audio formats. It is primarily
426
429
  > intended to support the reading/writing of external MP3 and Opus
@@ -436,9 +439,9 @@ The following nodes process audio chunks only.
436
439
  | **src** | 0 | "pcm" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
437
440
  | **dst** | 1 | "wav" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
438
441
 
439
- - Node: **wav**<br/>
442
+ - Node: **a2a-wav**<br/>
440
443
  Purpose: **WAV audio format conversion**<br/>
441
- Example: `wav(mode: "encode")`
444
+ Example: `a2a-wav(mode: "encode")`
442
445
 
443
446
  > This node allows converting between PCM and WAV audio formats. It is
444
447
  > primarily intended to support the reading/writing of external WAV
@@ -453,9 +456,9 @@ The following nodes process audio chunks only.
453
456
  | ----------- | --------- | -------- | ------------------------ |
454
457
  | **mode** | 0 | "encode" | `/^(?:encode\|decode)$/` |
455
458
 
456
- - Node: **mute**<br/>
459
+ - Node: **a2a-mute**<br/>
457
460
  Purpose: **volume muting node**<br/>
458
- Example: `mute()`
461
+ Example: `a2a-mute()`
459
462
  Notice: this node has to be externally controlled via REST/WebSockets!
460
463
 
461
464
  > This node allows muting the audio stream by either silencing or even
@@ -469,9 +472,9 @@ The following nodes process audio chunks only.
469
472
  | Parameter | Position | Default | Requirement |
470
473
  | ----------- | --------- | -------- | ------------------------ |
471
474
 
472
- - Node: **meter**<br/>
475
+ - Node: **a2a-meter**<br/>
473
476
  Purpose: **Loudness metering node**<br/>
474
- Example: `meter(250)`
477
+ Example: `a2a-meter(250)`
475
478
 
476
479
  > This node allows measuring the loudness of the audio stream. The
477
480
  > results are emitted to both the logfile of **SpeechFlow** and the
@@ -486,9 +489,9 @@ The following nodes process audio chunks only.
486
489
  | ----------- | --------- | -------- | ------------------------ |
487
490
  | **interval** | 0 | 250 | *none* |
488
491
 
489
- - Node: **vad**<br/>
492
+ - Node: **a2a-vad**<br/>
490
493
  Purpose: **Voice Audio Detection (VAD) node**<br/>
491
- Example: `vad()`
494
+ Example: `a2a-vad()`
492
495
 
493
496
  > This node perform Voice Audio Detection (VAD), i.e., it detects
494
497
  > voice in the audio stream and if not detected either silences or
@@ -509,9 +512,9 @@ The following nodes process audio chunks only.
509
512
  | **preSpeechPadFrames** | *none* | 1 | *none* |
510
513
  | **postSpeechTail** | *none* | 1500 | *none* |
511
514
 
512
- - Node: **gender**<br/>
515
+ - Node: **a2a-gender**<br/>
513
516
  Purpose: **Gender Detection node**<br/>
514
- Example: `gender()`
517
+ Example: `a2a-gender()`
515
518
 
516
519
  > This node performs gender detection on the audio stream. It
517
520
  > annotates the audio chunks with `gender=male` or `gender=female`
@@ -526,9 +529,9 @@ The following nodes process audio chunks only.
526
529
  | ----------- | --------- | -------- | ------------------------ |
527
530
  | **window** | 0 | 500 | *none* |
528
531
 
529
- - Node: **speex**<br/>
532
+ - Node: **a2a-speex**<br/>
530
533
  Purpose: **Speex Noise Suppression node**<br/>
531
- Example: `speex(attentuate: -18)`
534
+ Example: `a2a-speex(attentuate: -18)`
532
535
 
533
536
  > This node uses the Speex DSP pre-processor to perform noise
534
537
  > suppression, i.e., it detects and attenuates (by a certain level of
@@ -543,9 +546,9 @@ The following nodes process audio chunks only.
543
546
  | ----------- | --------- | -------- | ------------------------ |
544
547
  | **attentuate** | 0 | -18 | *none* | `-60 <= n <= 0` |
545
548
 
546
- - Node: **rnnoise**<br/>
549
+ - Node: **a2a-rnnoise**<br/>
547
550
  Purpose: **RNNoise Noise Suppression node**<br/>
548
- Example: `rnnoise()`
551
+ Example: `a2a-rnnoise()`
549
552
 
550
553
  > This node uses RNNoise to perform noise suppression, i.e., it
551
554
  > detects and attenuates the noise in the audio stream.
@@ -558,9 +561,9 @@ The following nodes process audio chunks only.
558
561
  | Parameter | Position | Default | Requirement |
559
562
  | ----------- | --------- | -------- | ------------------------ |
560
563
 
561
- - Node: **compressor**<br/>
564
+ - Node: **a2a-compressor**<br/>
562
565
  Purpose: **audio compressor node**<br/>
563
- Example: `compressor(thresholdDb: -18)`
566
+ Example: `a2a-compressor(thresholdDb: -18)`
564
567
 
565
568
  > This node applies a dynamics compressor, i.e., it attenuates the
566
569
  > volume by a certain ratio whenever the volume is above the threshold.
@@ -579,9 +582,9 @@ The following nodes process audio chunks only.
579
582
  | **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
580
583
  | **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
581
584
 
582
- - Node: **expander**<br/>
585
+ - Node: **a2a-expander**<br/>
583
586
  Purpose: **audio expander node**<br/>
584
- Example: `expander(thresholdDb: -46)`
587
+ Example: `a2a-expander(thresholdDb: -46)`
585
588
 
586
589
  > This node applies a dynamics expander, i.e., it attenuates the
587
590
  > volume by a certain ratio whenever the volume is below the threshold.
@@ -600,9 +603,9 @@ The following nodes process audio chunks only.
600
603
  | **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
601
604
  | **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
602
605
 
603
- - Node: **gain**<br/>
606
+ - Node: **a2a-gain**<br/>
604
607
  Purpose: **audio gain adjustment node**<br/>
605
- Example: `gain(db: 12)`
608
+ Example: `a2a-gain(db: 12)`
606
609
 
607
610
  > This node applies a gain adjustment to audio, i.e., it increases or
608
611
  > decreases the volume by certain decibels
@@ -616,9 +619,9 @@ The following nodes process audio chunks only.
616
619
  | ----------- | --------- | -------- | ------------------------ |
617
620
  | **db** | *none* | 12 | `n >= -60 && n <= -60` |
618
621
 
619
- - Node: **filler**<br/>
622
+ - Node: **a2a-filler**<br/>
620
623
  Purpose: **audio filler node**<br/>
621
- Example: `filler()`
624
+ Example: `a2a-filler()`
622
625
 
623
626
  > This node adds missing audio frames of silence in order to fill
624
627
  > the chronological gaps between generated audio frames (from
@@ -636,9 +639,9 @@ The following nodes process audio chunks only.
636
639
 
637
640
  The following nodes convert audio to text chunks.
638
641
 
639
- - Node: **openaitranscribe**<br/>
642
+ - Node: **a2t-openai**<br/>
640
643
  Purpose: **OpenAI/GPT Speech-to-Text conversion**<br/>
641
- Example: `openaitranscribe(language: "de")`<br/>
644
+ Example: `a2t-openai(language: "de")`<br/>
642
645
  Notice: this node requires an OpenAI API key!
643
646
 
644
647
  > This node uses OpenAI GPT to perform Speech-to-Text (S2T)
@@ -658,9 +661,9 @@ The following nodes convert audio to text chunks.
658
661
  | **language** | *none* | "en" | `/^(?:de\|en)$/` |
659
662
  | **interim** | *none* | false | *none* |
660
663
 
661
- - Node: **awstranscribe**<br/>
664
+ - Node: **a2t-amazon**<br/>
662
665
  Purpose: **Amazon Transcribe Speech-to-Text conversion**<br/>
663
- Example: `awstranscribe(language: "de")`<br/>
666
+ Example: `a2t-amazon(language: "de")`<br/>
664
667
  Notice: this node requires an API key!
665
668
 
666
669
  > This node uses Amazon Trancribe to perform Speech-to-Text (S2T)
@@ -680,9 +683,9 @@ The following nodes convert audio to text chunks.
680
683
  | **language** | *none* | "en" | `/^(?:en|de)$/` |
681
684
  | **interim** | *none* | false | *none* |
682
685
 
683
- - Node: **deepgram**<br/>
686
+ - Node: **a2t-deepgram**<br/>
684
687
  Purpose: **Deepgram Speech-to-Text conversion**<br/>
685
- Example: `deepgram(language: "de")`<br/>
688
+ Example: `a2t-deepgram(language: "de")`<br/>
686
689
  Notice: this node requires an API key!
687
690
 
688
691
  > This node performs Speech-to-Text (S2T) conversion, i.e., it
@@ -706,9 +709,9 @@ The following nodes convert audio to text chunks.
706
709
 
707
710
  The following nodes process text chunks only.
708
711
 
709
- - Node: **deepl**<br/>
712
+ - Node: **t2t-deepl**<br/>
710
713
  Purpose: **DeepL Text-to-Text translation**<br/>
711
- Example: `deepl(src: "de", dst: "en")`<br/>
714
+ Example: `t2t-deepl(src: "de", dst: "en")`<br/>
712
715
  Notice: this node requires an API key!
713
716
 
714
717
  > This node performs translation between English and German languages.
@@ -724,9 +727,9 @@ The following nodes process text chunks only.
724
727
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
725
728
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
726
729
 
727
- - Node: **awstranslate**<br/>
730
+ - Node: **t2t-amazon**<br/>
728
731
  Purpose: **AWS Translate Text-to-Text translation**<br/>
729
- Example: `awstranslate(src: "de", dst: "en")`<br/>
732
+ Example: `t2t-amazon(src: "de", dst: "en")`<br/>
730
733
  Notice: this node requires an API key!
731
734
 
732
735
  > This node performs translation between English and German languages.
@@ -744,9 +747,9 @@ The following nodes process text chunks only.
744
747
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
745
748
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
746
749
 
747
- - Node: **openai**<br/>
750
+ - Node: **t2t-openai**<br/>
748
751
  Purpose: **OpenAI/GPT Text-to-Text translation and spelling correction**<br/>
749
- Example: `openai(src: "de", dst: "en")`<br/>
752
+ Example: `t2t-openai(src: "de", dst: "en")`<br/>
750
753
  Notice: this node requires an OpenAI API key!
751
754
 
752
755
  > This node performs translation between English and German languages
@@ -766,11 +769,11 @@ The following nodes process text chunks only.
766
769
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
767
770
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
768
771
  | **key** | *none* | env.SPEECHFLOW\_OPENAI\_KEY | *none* |
769
- | **model** | *none* | "gpt-4o-mini" | *none* |
772
+ | **model** | *none* | "gpt-5-mini" | *none* |
770
773
 
771
- - Node: **ollama**<br/>
774
+ - Node: **t2t-ollama**<br/>
772
775
  Purpose: **Ollama/Gemma Text-to-Text translation and spelling correction**<br/>
773
- Example: `ollama(src: "de", dst: "en")`<br/>
776
+ Example: `t2t-ollama(src: "de", dst: "en")`<br/>
774
777
  Notice: this node requires Ollama to be installed!
775
778
 
776
779
  > This node performs translation between English and German languages
@@ -791,9 +794,9 @@ The following nodes process text chunks only.
791
794
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
792
795
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
793
796
 
794
- - Node: **transformers**<br/>
797
+ - Node: **t2t-transformers**<br/>
795
798
  Purpose: **Transformers Text-to-Text translation**<br/>
796
- Example: `transformers(src: "de", dst: "en")`<br/>
799
+ Example: `t2t-transformers(src: "de", dst: "en")`<br/>
797
800
 
798
801
  > This node performs translation between English and German languages
799
802
  > in the text stream. It is based on local OPUS or SmolLM3 LLMs.
@@ -809,9 +812,9 @@ The following nodes process text chunks only.
809
812
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
810
813
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
811
814
 
812
- - Node: **google**<br/>
815
+ - Node: **t2t-google**<br/>
813
816
  Purpose: **Google Cloud Translate Text-to-Text translation**<br/>
814
- Example: `google(src: "de", dst: "en")`<br/>
817
+ Example: `t2t-google(src: "de", dst: "en")`<br/>
815
818
  Notice: this node requires a Google Cloud API key and project ID!
816
819
 
817
820
  > This node performs translation between multiple languages
@@ -829,13 +832,31 @@ The following nodes process text chunks only.
829
832
  | **src** | 0 | "de" | `/^(?:de\|en\|fr\|it)$/` |
830
833
  | **dst** | 1 | "en" | `/^(?:de\|en\|fr\|it)$/` |
831
834
 
832
- - Node: **sentence**<br/>
835
+ - Node: **t2t-modify**<br/>
836
+ Purpose: **regex-based text modification**<br/>
837
+ Example: `t2t-modify(match: "\\b(hello)\\b", replace: "hi $1")`<br/>
838
+
839
+ > This node allows regex-based modification of text chunks using pattern
840
+ > matching and replacement with support for $n backreferences. It is
841
+ > primarily intended for text preprocessing, cleanup, or transformation tasks.
842
+
843
+ | Port | Payload |
844
+ | ------- | ----------- |
845
+ | input | text |
846
+ | output | text |
847
+
848
+ | Parameter | Position | Default | Requirement |
849
+ | ------------ | --------- | -------- | ------------------ |
850
+ | **match** | 0 | "" | *required* |
851
+ | **replace** | 1 | "" | *required* |
852
+
853
+ - Node: **t2t-sentence**<br/>
833
854
  Purpose: **sentence splitting/merging**<br/>
834
- Example: `sentence()`<br/>
855
+ Example: `t2t-sentence()`<br/>
835
856
 
836
857
  > This node allows you to ensure that a text stream is split or merged
837
858
  > into complete sentences. It is primarily intended to be used after
838
- > the "deepgram" node and before "deepl" or "elevenlabs" nodes in
859
+ > the "a2t-deepgram" node and before "t2t-deepl" or "t2a-elevenlabs" nodes in
839
860
  > order to improve overall quality.
840
861
 
841
862
  | Port | Payload |
@@ -846,9 +867,9 @@ The following nodes process text chunks only.
846
867
  | Parameter | Position | Default | Requirement |
847
868
  | ------------ | --------- | -------- | ------------------ |
848
869
 
849
- - Node: **subtitle**<br/>
870
+ - Node: **t2t-subtitle**<br/>
850
871
  Purpose: **SRT/VTT Subtitle Generation**<br/>
851
- Example: `subtitle(format: "srt")`<br/>
872
+ Example: `t2t-subtitle(format: "srt")`<br/>
852
873
 
853
874
  > This node generates subtitles from the text stream (and its embedded
854
875
  > timestamps) in the formats SRT (SubRip) or VTT (WebVTT).
@@ -863,9 +884,9 @@ The following nodes process text chunks only.
863
884
  | **format** | *none* | "srt" | /^(?:srt\|vtt)$/ |
864
885
  | **words** | *none* | false | *none* |
865
886
 
866
- - Node: **format**<br/>
887
+ - Node: **t2t-format**<br/>
867
888
  Purpose: **text paragraph formatting**<br/>
868
- Example: `format(width: 80)`<br/>
889
+ Example: `t2t-format(width: 80)`<br/>
869
890
 
870
891
  > This node formats the text stream into lines no longer than a
871
892
  > certain width. It is primarily intended for use before writing text
@@ -884,9 +905,9 @@ The following nodes process text chunks only.
884
905
 
885
906
  The following nodes convert text chunks to audio chunks.
886
907
 
887
- - Node: **awspolly**<br/>
908
+ - Node: **t2a-amazon**<br/>
888
909
  Purpose: **Amazon Polly Text-to-Speech conversion**<br/>
889
- Example: `awspolly(language: "en", voice: "Danielle)`<br/>
910
+ Example: `t2a-amazon(language: "en", voice: "Danielle)`<br/>
890
911
  Notice: this node requires an Amazon API key!
891
912
 
892
913
  > This node uses Amazon Polly to perform Text-to-Speech (T2S)
@@ -906,9 +927,9 @@ The following nodes convert text chunks to audio chunks.
906
927
  | **voice** | 0 | "Amy" | `^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Viki|Daniel)$/` |
907
928
  | **language** | 1 | "en" | `/^(?:de\|en)$/` |
908
929
 
909
- - Node: **elevenlabs**<br/>
930
+ - Node: **t2a-elevenlabs**<br/>
910
931
  Purpose: **ElevenLabs Text-to-Speech conversion**<br/>
911
- Example: `elevenlabs(language: "en")`<br/>
932
+ Example: `t2a-elevenlabs(language: "en")`<br/>
912
933
  Notice: this node requires an ElevenLabs API key!
913
934
 
914
935
  > This node uses ElevenLabs to perform Text-to-Speech (T2S)
@@ -930,9 +951,9 @@ The following nodes convert text chunks to audio chunks.
930
951
  | **similarity** | 4 | 0.75 | `n >= 0.0 && n <= 1.0` |
931
952
  | **optimize** | 5 | "latency" | `/^(?:latency\|quality)$/` |
932
953
 
933
- - Node: **kokoro**<br/>
954
+ - Node: **t2a-kokoro**<br/>
934
955
  Purpose: **Kokoro Text-to-Speech conversion**<br/>
935
- Example: `kokoro(language: "en")`<br/>
956
+ Example: `t2a-kokoro(language: "en")`<br/>
936
957
  Notice: this currently support English language only!
937
958
 
938
959
  > This node uses Kokoro to perform Text-to-Speech (T2S) conversion,
@@ -954,12 +975,12 @@ The following nodes convert text chunks to audio chunks.
954
975
 
955
976
  The following nodes process any type of chunk, i.e., both audio and text chunks.
956
977
 
957
- - Node: **filter**<br/>
978
+ - Node: **x2x-filter**<br/>
958
979
  Purpose: **meta information based filter**<br/>
959
- Example: `filter(type: "audio", var: "meta:gender", op: "==", val: "male")`<br/>
980
+ Example: `x2x-filter(type: "audio", var: "meta:gender", op: "==", val: "male")`<br/>
960
981
 
961
982
  > This node allows you to filter nodes based on certain criteria. It
962
- > is primarily intended to be used in conjunction with the "gender"
983
+ > is primarily intended to be used in conjunction with the "a2a-gender"
963
984
  > node and in front of the `elevenlabs` or `kokoro` nodes in order to
964
985
  > translate with a corresponding voice.
965
986
 
@@ -976,9 +997,9 @@ The following nodes process any type of chunk, i.e., both audio and text chunks.
976
997
  | **op** | 3 | "==" | `/^(?:<\|<=\|==\|!=\|~~\|!~\|>=\|>)$/` |
977
998
  | **val** | 4 | "" | `/^.*$/` |
978
999
 
979
- - Node: **trace**<br/>
1000
+ - Node: **x2x-trace**<br/>
980
1001
  Purpose: **data flow tracing**<br/>
981
- Example: `trace(type: "audio")`<br/>
1002
+ Example: `x2x-trace(type: "audio")`<br/>
982
1003
 
983
1004
  > This node allows you to trace the audio and text chunk flow through
984
1005
  > the **SpeechFlow** graph. It just passes through its chunks, but
@@ -1000,33 +1021,33 @@ REST/WebSocket API
1000
1021
  **SpeechFlow** has an externally exposed REST/WebSockets API which can
1001
1022
  be used to control the nodes and to receive information from nodes.
1002
1023
  For controlling a node you have three possibilities (illustrated by
1003
- controlling the mode of the "mute" node):
1024
+ controlling the mode of the "a2a-mute" node):
1004
1025
 
1005
1026
  ```sh
1006
1027
  # use HTTP/REST/GET:
1007
- $ curl http://127.0.0.1:8484/api/COMMAND/mute/mode/silenced
1028
+ $ curl http://127.0.0.1:8484/api/COMMAND/a2a-mute/mode/silenced
1008
1029
  ```
1009
1030
 
1010
1031
  ```sh
1011
1032
  # use HTTP/REST/POST:
1012
1033
  $ curl -H "Content-type: application/json" \
1013
- --data '{ "request": "COMMAND", "node": "mute", "args": [ "mode", "silenced" ] }' \
1034
+ --data '{ "request": "COMMAND", "node": "a2a-mute", "args": [ "mode", "silenced" ] }' \
1014
1035
  http://127.0.0.1:8484/api
1015
1036
  ```
1016
1037
 
1017
1038
  ```sh
1018
1039
  # use WebSockets:
1019
1040
  $ wscat -c ws://127.0.0.1:8484/api \
1020
- > { "request": "COMMAND", "node": "mute", "args": [ "mode", "silenced" ] }
1041
+ > { "request": "COMMAND", "node": "a2a-mute", "args": [ "mode", "silenced" ] }
1021
1042
  ```
1022
1043
 
1023
1044
  For receiving emitted information from nodes, you have to use the WebSockets
1024
- API (illustrated by the emitted information of the "meter" node):
1045
+ API (illustrated by the emitted information of the "a2a-meter" node):
1025
1046
 
1026
1047
  ```sh
1027
1048
  # use WebSockets:
1028
1049
  $ wscat -c ws://127.0.0.1:8484/api \
1029
- < { "response": "NOTIFY", "node": "meter", "args": [ "meter", "LUFS-S", -35.75127410888672 ] }
1050
+ < { "response": "NOTIFY", "node": "a2a-meter", "args": [ "meter", "LUFS-S", -35.75127410888672 ] }
1030
1051
  ```
1031
1052
 
1032
1053
  History