speechflow 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +210 -167
  3. package/etc/claude.md +83 -46
  4. package/etc/speechflow.yaml +84 -84
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
  7. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -4
  8. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
  10. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -4
  11. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  12. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.d.ts +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +5 -15
  14. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
  16. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +4 -4
  17. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +3 -3
  20. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +3 -3
  23. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
  25. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +3 -3
  26. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +3 -3
  29. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +3 -3
  32. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +3 -3
  35. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
  37. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
  38. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  39. package/speechflow-cli/dst/speechflow-node-a2a-wav.d.ts +1 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +3 -3
  41. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  42. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +18 -0
  43. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +312 -0
  44. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -0
  45. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +1 -1
  46. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +7 -12
  47. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -1
  48. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
  49. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +4 -4
  50. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  51. package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +19 -0
  52. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +351 -0
  53. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -0
  54. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +1 -1
  55. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +6 -6
  56. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +16 -0
  58. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +204 -0
  59. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -0
  60. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +40 -7
  62. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  64. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -5
  65. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  66. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +41 -7
  68. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-amazon.d.ts +13 -0
  70. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +175 -0
  71. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -0
  72. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +1 -1
  73. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +39 -5
  74. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -1
  75. package/speechflow-cli/dst/speechflow-node-t2t-deepl.d.ts +1 -1
  76. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +6 -5
  77. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-t2t-format.d.ts +1 -1
  79. package/speechflow-cli/dst/speechflow-node-t2t-format.js +3 -3
  80. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  81. package/speechflow-cli/dst/speechflow-node-t2t-google.d.ts +13 -0
  82. package/speechflow-cli/dst/speechflow-node-t2t-google.js +153 -0
  83. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -0
  84. package/speechflow-cli/dst/speechflow-node-t2t-modify.d.ts +11 -0
  85. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +111 -0
  86. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -0
  87. package/speechflow-cli/dst/speechflow-node-t2t-ollama.d.ts +1 -1
  88. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +39 -5
  89. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  90. package/speechflow-cli/dst/speechflow-node-t2t-openai.d.ts +1 -1
  91. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +39 -5
  92. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  93. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
  94. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +3 -3
  95. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.d.ts +1 -1
  97. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +6 -5
  98. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  99. package/speechflow-cli/dst/speechflow-node-t2t-transformers.d.ts +1 -1
  100. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +6 -5
  101. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  102. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -1
  103. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +3 -3
  104. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  105. package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
  106. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +3 -3
  107. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  108. package/speechflow-cli/dst/speechflow-node-xio-device.d.ts +1 -1
  109. package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -3
  110. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  111. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -1
  112. package/speechflow-cli/dst/speechflow-node-xio-file.js +43 -22
  113. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  114. package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -1
  115. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +3 -3
  116. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  117. package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -1
  118. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +3 -3
  119. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  120. package/speechflow-cli/dst/speechflow-utils.d.ts +16 -0
  121. package/speechflow-cli/dst/speechflow-utils.js +140 -1
  122. package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
  123. package/speechflow-cli/dst/speechflow.js +19 -19
  124. package/speechflow-cli/dst/speechflow.js.map +1 -1
  125. package/speechflow-cli/etc/biome.jsonc +2 -1
  126. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  127. package/speechflow-cli/package.json +16 -15
  128. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -4
  129. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -4
  130. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -14
  131. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +4 -4
  132. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +2 -2
  133. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +2 -2
  134. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  135. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +2 -2
  136. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +2 -2
  137. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +2 -2
  138. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +2 -2
  139. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -2
  140. package/speechflow-cli/src/{speechflow-node-a2t-awstranscribe.ts → speechflow-node-a2t-amazon.ts} +11 -13
  141. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +4 -4
  142. package/speechflow-cli/src/{speechflow-node-a2t-openaitranscribe.ts → speechflow-node-a2t-openai.ts} +7 -7
  143. package/speechflow-cli/src/{speechflow-node-t2a-awspolly.ts → speechflow-node-t2a-amazon.ts} +8 -8
  144. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +4 -4
  145. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +7 -6
  146. package/speechflow-cli/src/{speechflow-node-t2t-awstranslate.ts → speechflow-node-t2t-amazon.ts} +6 -5
  147. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +5 -4
  148. package/speechflow-cli/src/speechflow-node-t2t-format.ts +2 -2
  149. package/speechflow-cli/src/speechflow-node-t2t-google.ts +133 -0
  150. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +84 -0
  151. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +5 -4
  152. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +5 -4
  153. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +2 -2
  154. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +10 -9
  155. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +5 -4
  156. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -2
  157. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +2 -2
  158. package/speechflow-cli/src/speechflow-node-xio-device.ts +2 -2
  159. package/speechflow-cli/src/speechflow-node-xio-file.ts +43 -21
  160. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
  161. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +2 -2
  162. package/speechflow-cli/src/speechflow-utils.ts +196 -1
  163. package/speechflow-cli/src/speechflow.ts +22 -22
  164. package/speechflow-ui-db/dst/app-font-fa-brands-400.woff2 +0 -0
  165. package/speechflow-ui-db/dst/app-font-fa-regular-400.woff2 +0 -0
  166. package/speechflow-ui-db/dst/app-font-fa-solid-900.woff2 +0 -0
  167. package/speechflow-ui-db/dst/app-font-fa-v4compatibility.woff2 +0 -0
  168. package/speechflow-ui-db/dst/index.css +2 -2
  169. package/speechflow-ui-db/dst/index.js +37 -38
  170. package/speechflow-ui-db/package.json +10 -10
  171. package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
  172. package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
  173. package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
  174. package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
  175. package/speechflow-ui-st/dst/index.css +2 -2
  176. package/speechflow-ui-st/dst/index.js +32 -33
  177. package/speechflow-ui-st/package.json +11 -11
package/README.md CHANGED
@@ -45,12 +45,14 @@ cloud-based [ElevenLabs](https://elevenlabs.io/) text-to-speech conversion,
45
45
  cloud-based [Amazon Polly](https://aws.amazon.com/polly/) text-to-speech conversion,
46
46
  cloud-based [DeepL](https://deepl.com) text-to-text translation,
47
47
  cloud-based [Amazon Translate](https://aws.amazon.com/translate/) text-to-text translation,
48
+ cloud-based [Google Cloud Translate](https://cloud.google.com/translate) text-to-text translation,
48
49
  cloud-based [OpenAI/GPT](https://openai.com) text-to-text translation (or spelling correction),
49
50
  local [Ollama/Gemma](https://ollama.com) text-to-text translation (or spelling correction),
50
51
  local [OPUS/ONNX](https://github.com/Helsinki-NLP/Opus-MT) text-to-text translation,
51
52
  local [FFmpeg](https://ffmpeg.org/) speech-to-speech encoding,
52
53
  local WAV speech-to-speech encoding,
53
54
  local text-to-text formatting,
55
+ local text-to-text regex-based modification,
54
56
  local text-to-text sentencing merging/splitting,
55
57
  local text-to-text subtitle generation,
56
58
  local text or audio filter, and
@@ -74,18 +76,18 @@ and real-time translated to English.
74
76
  First, the used configuration was a straight linear pipeline in file `sample.conf`:
75
77
 
76
78
  ```txt
77
- device(device: "coreaudio:Elgato Wave:3", mode: "r") |
78
- meter(interval: 50, dashboard: "meter1") |
79
- deepgram(language: "de", model: "nova-2", interim: true) |
80
- trace(type: "text", dashboard: "text1") |
81
- filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
82
- sentence() |
83
- trace(type: "text", dashboard: "text2") |
84
- deepl(src: "de", dst: "en") |
85
- trace(type: "text", dashboard: "text3") |
86
- elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
87
- meter(interval: 50, dashboard: "meter2") |
88
- device(device: "coreaudio:USBAudio2.0", mode: "w")
79
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
80
+ a2a-meter(interval: 50, dashboard: "meter1") |
81
+ a2t-deepgram(language: "de", model: "nova-2", interim: true) |
82
+ x2x-trace(type: "text", dashboard: "text1") |
83
+ x2x-filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
84
+ t2t-sentence() |
85
+ x2x-trace(type: "text", dashboard: "text2") |
86
+ t2t-deepl(src: "de", dst: "en") |
87
+ x2x-trace(type: "text", dashboard: "text3") |
88
+ t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
89
+ a2a-meter(interval: 50, dashboard: "meter2") |
90
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
89
91
  ```
90
92
 
91
93
  Second, the corresponding **SpeechFlow** command was:
@@ -99,13 +101,13 @@ Finally, the resulting dashboard under URL `http://127.0.0.1:8484/` was:
99
101
 
100
102
  ![dashboard](etc/speechflow.png)
101
103
 
102
- On the left you can see the volume meter of the microphone (`device`),
104
+ On the left you can see the volume meter of the microphone (`xio-device`),
103
105
  followed by the German result of the speech-to-text conversion
104
- (`deepgram`), followed by the still German results of the text-to-text
105
- sentence splitting/aggregation (`sentence`), followed by the English
106
- results of the text-to-text translation (`deepl`) and then finally on
106
+ (`a2t-deepgram`), followed by the still German results of the text-to-text
107
+ sentence splitting/aggregation (`t2t-sentence`), followed by the English
108
+ results of the text-to-text translation (`t2t-deepl`) and then finally on
107
109
  the right you can see the volume meter of the text-to-speech conversion
108
- (`elevenlabs`).
110
+ (`t2a-elevenlabs`).
109
111
 
110
112
  The entire **SpeechFlow** processing pipeline runs in real-time and
111
113
  the latency between input and output audio is about 2-3 seconds, very
@@ -187,92 +189,92 @@ They can also be found in the sample [speechflow.yaml](./etc/speechflow.yaml) fi
187
189
  - **Capturing**: Capture audio from microphone device into WAV audio file:
188
190
 
189
191
  ```
190
- device(device: "wasapi:VoiceMeeter Out B1", mode: "r") |
191
- wav(mode: "encode") |
192
- file(path: "capture.wav", mode: "w", type: "audio")
192
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
193
+ a2a-wav(mode: "encode") |
194
+ xio-file(path: "capture.wav", mode: "w", type: "audio")
193
195
  ```
194
196
 
195
197
  - **Pass-Through**: Pass-through audio from microphone device to speaker
196
198
  device and in parallel record it to WAV audio file:
197
199
 
198
200
  ```
199
- device(device: "wasapi:VoiceMeeter Out B1", mode: "r") | {
200
- wav(mode: "encode") |
201
- file(path: "capture.wav", mode: "w", type: "audio"),
202
- device(device: "wasapi:VoiceMeeter VAIO3 Input", mode: "w")
201
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
202
+ a2a-wav(mode: "encode") |
203
+ xio-file(path: "capture.wav", mode: "w", type: "audio"),
204
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
203
205
  }
204
206
  ```
205
207
 
206
208
  - **Transcription**: Generate text file with German transcription of MP3 audio file:
207
209
 
208
210
  ```
209
- file(path: argv.0, mode: "r", type: "audio") |
210
- ffmpeg(src: "mp3", dst: "pcm") |
211
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
212
- format(width: 80) |
213
- file(path: argv.1, mode: "w", type: "text")
211
+ xio-file(path: argv.0, mode: "r", type: "audio") |
212
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
213
+ a2t-deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
214
+ t2t-format(width: 80) |
215
+ xio-file(path: argv.1, mode: "w", type: "text")
214
216
  ```
215
217
 
216
218
  - **Subtitling**: Generate text file with German subtitles of MP3 audio file:
217
219
 
218
220
  ```
219
- file(path: argv.0, mode: "r", type: "audio") |
220
- ffmpeg(src: "mp3", dst: "pcm") |
221
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
222
- subtitle(format: "vtt") |
223
- file(path: argv.1, mode: "w", type: "text")
221
+ xio-file(path: argv.0, mode: "r", type: "audio") |
222
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
223
+ a2t-deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
224
+ t2t-subtitle(format: "vtt") |
225
+ xio-file(path: argv.1, mode: "w", type: "text")
224
226
  ```
225
227
 
226
228
  - **Speaking**: Generate audio file with English voice for a text file:
227
229
 
228
230
  ```
229
- file(path: argv.0, mode: "r", type: "text") |
230
- kokoro(language: "en") |
231
- wav(mode: "encode") |
232
- file(path: argv.1, mode: "w", type: "audio")
231
+ xio-file(path: argv.0, mode: "r", type: "text") |
232
+ t2a-kokoro(language: "en") |
233
+ a2a-wav(mode: "encode") |
234
+ xio-file(path: argv.1, mode: "w", type: "audio")
233
235
  ```
234
236
 
235
237
  - **Ad-Hoc Translation**: Ad-Hoc text translation from German to English
236
238
  via stdin/stdout:
237
239
 
238
240
  ```
239
- file(path: "-", mode: "r", type: "text") |
240
- deepl(src: "de", dst: "en") |
241
- file(path: "-", mode: "w", type: "text")
241
+ xio-file(path: "-", mode: "r", type: "text") |
242
+ t2t-deepl(src: "de", dst: "en") |
243
+ xio-file(path: "-", mode: "w", type: "text")
242
244
  ```
243
245
 
244
246
  - **Studio Translation**: Real-time studio translation from German to English,
245
247
  including the capturing of all involved inputs and outputs:
246
248
 
247
249
  ```
248
- device(device: "coreaudio:Elgato Wave:3", mode: "r") | {
249
- gender() | {
250
- meter(interval: 250) |
251
- wav(mode: "encode") |
252
- file(path: "program-de.wav", mode: "w", type: "audio"),
253
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) | {
254
- sentence() | {
255
- format(width: 80) |
256
- file(path: "program-de.txt", mode: "w", type: "text"),
257
- deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) | {
258
- trace(name: "text", type: "text") | {
259
- format(width: 80) |
260
- file(path: "program-en.txt", mode: "w", type: "text"),
261
- subtitle(format: "srt") |
262
- file(path: "program-en.srt", mode: "w", type: "text"),
263
- mqtt(url: "mqtt://10.1.0.10:1883",
250
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
251
+ a2a-gender() | {
252
+ a2a-meter(interval: 250) |
253
+ a2a-wav(mode: "encode") |
254
+ xio-file(path: "program-de.wav", mode: "w", type: "audio"),
255
+ a2t-deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) | {
256
+ t2t-sentence() | {
257
+ t2t-format(width: 80) |
258
+ xio-file(path: "program-de.txt", mode: "w", type: "text"),
259
+ t2t-deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) | {
260
+ x2x-trace(name: "text", type: "text") | {
261
+ t2t-format(width: 80) |
262
+ xio-file(path: "program-en.txt", mode: "w", type: "text"),
263
+ t2t-subtitle(format: "srt") |
264
+ xio-file(path: "program-en.srt", mode: "w", type: "text"),
265
+ xio-mqtt(url: "mqtt://10.1.0.10:1883",
264
266
  username: env.SPEECHFLOW_MQTT_USER,
265
267
  password: env.SPEECHFLOW_MQTT_PASS,
266
268
  topicWrite: "stream/studio/sender"),
267
269
  {
268
- filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
269
- elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
270
- filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
271
- elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
270
+ x2x-filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
271
+ t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
272
+ x2x-filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
273
+ t2a-elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
272
274
  } | {
273
- wav(mode: "encode") |
274
- file(path: "program-en.wav", mode: "w", type: "audio"),
275
- device(device: "coreaudio:USBAudio2.0", mode: "w")
275
+ a2a-wav(mode: "encode") |
276
+ xio-file(path: "program-en.wav", mode: "w", type: "audio"),
277
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
276
278
  }
277
279
  }
278
280
  }
@@ -288,51 +290,54 @@ Processing Node Types
288
290
  First a short overview of the available processing nodes:
289
291
 
290
292
  - Input/Output nodes:
291
- **file**,
292
- **device**,
293
- **websocket**,
294
- **mqtt**.
293
+ **xio-file**,
294
+ **xio-device**,
295
+ **xio-websocket**,
296
+ **xio-mqtt**.
295
297
  - Audio-to-Audio nodes:
296
- **ffmpeg**,
297
- **wav**,
298
- **mute**,
299
- **meter**,
300
- **vad**,
301
- **gender**,
302
- **speex**,
303
- **rrnoise**,
304
- **compressor**,
305
- **expander**,
306
- **gain**,
307
- **filler**.
298
+ **a2a-ffmpeg**,
299
+ **a2a-wav**,
300
+ **a2a-mute**,
301
+ **a2a-meter**,
302
+ **a2a-vad**,
303
+ **a2a-gender**,
304
+ **a2a-speex**,
305
+ **a2a-rnnoise**,
306
+ **a2a-compressor**,
307
+ **a2a-expander**,
308
+ **a2a-gain**,
309
+ **a2a-filler**.
308
310
  - Audio-to-Text nodes:
309
- **openaitranscribe**,
310
- **awstranscribe**,
311
- **deepgram**.
311
+ **a2t-openai**,
312
+ **a2t-amazon**,
313
+ **a2t-deepgram**.
312
314
  - Text-to-Text nodes:
313
- **deepl**,
314
- **awstranslate**,
315
- **openai**,
316
- **ollama**,
317
- **transformers**,
318
- **subtitle**,
319
- **format**.
315
+ **t2t-deepl**,
316
+ **t2t-amazon**,
317
+ **t2t-openai**,
318
+ **t2t-ollama**,
319
+ **t2t-transformers**,
320
+ **t2t-google**,
321
+ **t2t-modify**,
322
+ **t2t-subtitle**,
323
+ **t2t-format**,
324
+ **t2t-sentence**.
320
325
  - Text-to-Audio nodes:
321
- **awspolly**.
322
- **elevenlabs**.
323
- **kokoro**.
326
+ **t2a-amazon**,
327
+ **t2a-elevenlabs**,
328
+ **t2a-kokoro**.
324
329
  - Any-to-Any nodes:
325
- **filter**,
326
- **trace**.
330
+ **x2x-filter**,
331
+ **x2x-trace**.
327
332
 
328
333
  ### Input/Output Nodes
329
334
 
330
335
  The following nodes are for external I/O, i.e, to read/write from
331
336
  external files, devices and network services.
332
337
 
333
- - Node: **file**<br/>
338
+ - Node: **xio-file**<br/>
334
339
  Purpose: **File and StdIO source/sink**<br/>
335
- Example: `file(path: "capture.pcm", mode: "w", type: "audio")`
340
+ Example: `xio-file(path: "capture.pcm", mode: "w", type: "audio")`
336
341
 
337
342
  > This node allows the reading/writing from/to files or from StdIO. It
338
343
  > is intended to be used as source and sink nodes in batch processing,
@@ -351,9 +356,9 @@ external files, devices and network services.
351
356
  | **chunka** | | 200 | `10 <= n <= 1000` |
352
357
  | **chunkt** | | 65536 | `1024 <= n <= 131072` |
353
358
 
354
- - Node: **device**<br/>
359
+ - Node: **xio-device**<br/>
355
360
  Purpose: **Microphone/speaker device source/sink**<br/>
356
- Example: `device(device: "wasapi:VoiceMeeter Out B1", mode: "r")`
361
+ Example: `xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r")`
357
362
 
358
363
  > This node allows the reading/writing from/to audio devices. It is
359
364
  > intended to be used as source nodes for microphone devices and as
@@ -370,9 +375,9 @@ external files, devices and network services.
370
375
  | **mode** | 1 | "rw" | `/^(?:r\|w\|rw)$/` |
371
376
  | **chunk** | 2 | 200 | `10 <= n <= 1000` |
372
377
 
373
- - Node: **websocket**<br/>
378
+ - Node: **xio-websocket**<br/>
374
379
  Purpose: **WebSocket source/sink**<br/>
375
- Example: `websocket(connect: "ws://127.0.0.1:12345", type: "text")`
380
+ Example: `xio-websocket(connect: "ws://127.0.0.1:12345", type: "text")`
376
381
  Notice: this node requires a peer WebSocket service!
377
382
 
378
383
  > This node allows reading/writing from/to WebSocket network services.
@@ -391,9 +396,9 @@ external files, devices and network services.
391
396
  | **connect** | *none* | *none* | `/^(?:\|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/` |
392
397
  | **type** | *none* | "audio" | `/^(?:audio\|text)$/` |
393
398
 
394
- - Node: **mqtt**<br/>
399
+ - Node: **xio-mqtt**<br/>
395
400
  Purpose: **MQTT sink**<br/>
396
- Example: `mqtt(url: "mqtt://127.0.0.1:1883", username: "foo", password: "bar", topic: "quux")`
401
+ Example: `xio-mqtt(url: "mqtt://127.0.0.1:1883", username: "foo", password: "bar", topic: "quux")`
397
402
  Notice: this node requires a peer MQTT broker!
398
403
 
399
404
  > This node allows reading/writing from/to MQTT broker topics. It is
@@ -416,9 +421,9 @@ external files, devices and network services.
416
421
 
417
422
  The following nodes process audio chunks only.
418
423
 
419
- - Node: **ffmpeg**<br/>
424
+ - Node: **a2a-ffmpeg**<br/>
420
425
  Purpose: **FFmpeg audio format conversion**<br/>
421
- Example: `ffmpeg(src: "pcm", dst: "mp3")`
426
+ Example: `a2a-ffmpeg(src: "pcm", dst: "mp3")`
422
427
 
423
428
  > This node allows converting between audio formats. It is primarily
424
429
  > intended to support the reading/writing of external MP3 and Opus
@@ -434,9 +439,9 @@ The following nodes process audio chunks only.
434
439
  | **src** | 0 | "pcm" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
435
440
  | **dst** | 1 | "wav" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
436
441
 
437
- - Node: **wav**<br/>
442
+ - Node: **a2a-wav**<br/>
438
443
  Purpose: **WAV audio format conversion**<br/>
439
- Example: `wav(mode: "encode")`
444
+ Example: `a2a-wav(mode: "encode")`
440
445
 
441
446
  > This node allows converting between PCM and WAV audio formats. It is
442
447
  > primarily intended to support the reading/writing of external WAV
@@ -451,9 +456,9 @@ The following nodes process audio chunks only.
451
456
  | ----------- | --------- | -------- | ------------------------ |
452
457
  | **mode** | 0 | "encode" | `/^(?:encode\|decode)$/` |
453
458
 
454
- - Node: **mute**<br/>
459
+ - Node: **a2a-mute**<br/>
455
460
  Purpose: **volume muting node**<br/>
456
- Example: `mute()`
461
+ Example: `a2a-mute()`
457
462
  Notice: this node has to be externally controlled via REST/WebSockets!
458
463
 
459
464
  > This node allows muting the audio stream by either silencing or even
@@ -467,9 +472,9 @@ The following nodes process audio chunks only.
467
472
  | Parameter | Position | Default | Requirement |
468
473
  | ----------- | --------- | -------- | ------------------------ |
469
474
 
470
- - Node: **meter**<br/>
475
+ - Node: **a2a-meter**<br/>
471
476
  Purpose: **Loudness metering node**<br/>
472
- Example: `meter(250)`
477
+ Example: `a2a-meter(250)`
473
478
 
474
479
  > This node allows measuring the loudness of the audio stream. The
475
480
  > results are emitted to both the logfile of **SpeechFlow** and the
@@ -484,9 +489,9 @@ The following nodes process audio chunks only.
484
489
  | ----------- | --------- | -------- | ------------------------ |
485
490
  | **interval** | 0 | 250 | *none* |
486
491
 
487
- - Node: **vad**<br/>
492
+ - Node: **a2a-vad**<br/>
488
493
  Purpose: **Voice Audio Detection (VAD) node**<br/>
489
- Example: `vad()`
494
+ Example: `a2a-vad()`
490
495
 
491
496
  > This node perform Voice Audio Detection (VAD), i.e., it detects
492
497
  > voice in the audio stream and if not detected either silences or
@@ -507,9 +512,9 @@ The following nodes process audio chunks only.
507
512
  | **preSpeechPadFrames** | *none* | 1 | *none* |
508
513
  | **postSpeechTail** | *none* | 1500 | *none* |
509
514
 
510
- - Node: **gender**<br/>
515
+ - Node: **a2a-gender**<br/>
511
516
  Purpose: **Gender Detection node**<br/>
512
- Example: `gender()`
517
+ Example: `a2a-gender()`
513
518
 
514
519
  > This node performs gender detection on the audio stream. It
515
520
  > annotates the audio chunks with `gender=male` or `gender=female`
@@ -524,9 +529,9 @@ The following nodes process audio chunks only.
524
529
  | ----------- | --------- | -------- | ------------------------ |
525
530
  | **window** | 0 | 500 | *none* |
526
531
 
527
- - Node: **speex**<br/>
532
+ - Node: **a2a-speex**<br/>
528
533
  Purpose: **Speex Noise Suppression node**<br/>
529
- Example: `speex(attentuate: -18)`
534
+ Example: `a2a-speex(attentuate: -18)`
530
535
 
531
536
  > This node uses the Speex DSP pre-processor to perform noise
532
537
  > suppression, i.e., it detects and attenuates (by a certain level of
@@ -541,9 +546,9 @@ The following nodes process audio chunks only.
541
546
  | ----------- | --------- | -------- | ------------------------ |
542
547
  | **attentuate** | 0 | -18 | *none* | `-60 <= n <= 0` |
543
548
 
544
- - Node: **rnnoise**<br/>
549
+ - Node: **a2a-rnnoise**<br/>
545
550
  Purpose: **RNNoise Noise Suppression node**<br/>
546
- Example: `rnnoise()`
551
+ Example: `a2a-rnnoise()`
547
552
 
548
553
  > This node uses RNNoise to perform noise suppression, i.e., it
549
554
  > detects and attenuates the noise in the audio stream.
@@ -556,9 +561,9 @@ The following nodes process audio chunks only.
556
561
  | Parameter | Position | Default | Requirement |
557
562
  | ----------- | --------- | -------- | ------------------------ |
558
563
 
559
- - Node: **compressor**<br/>
564
+ - Node: **a2a-compressor**<br/>
560
565
  Purpose: **audio compressor node**<br/>
561
- Example: `compressor(thresholdDb: -18)`
566
+ Example: `a2a-compressor(thresholdDb: -18)`
562
567
 
563
568
  > This node applies a dynamics compressor, i.e., it attenuates the
564
569
  > volume by a certain ratio whenever the volume is above the threshold.
@@ -577,9 +582,9 @@ The following nodes process audio chunks only.
577
582
  | **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
578
583
  | **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
579
584
 
580
- - Node: **expander**<br/>
585
+ - Node: **a2a-expander**<br/>
581
586
  Purpose: **audio expander node**<br/>
582
- Example: `expander(thresholdDb: -46)`
587
+ Example: `a2a-expander(thresholdDb: -46)`
583
588
 
584
589
  > This node applies a dynamics expander, i.e., it attenuates the
585
590
  > volume by a certain ratio whenever the volume is below the threshold.
@@ -598,9 +603,9 @@ The following nodes process audio chunks only.
598
603
  | **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
599
604
  | **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
600
605
 
601
- - Node: **gain**<br/>
606
+ - Node: **a2a-gain**<br/>
602
607
  Purpose: **audio gain adjustment node**<br/>
603
- Example: `gain(db: 12)`
608
+ Example: `a2a-gain(db: 12)`
604
609
 
605
610
  > This node applies a gain adjustment to audio, i.e., it increases or
606
611
  > decreases the volume by certain decibels
@@ -614,9 +619,9 @@ The following nodes process audio chunks only.
614
619
  | ----------- | --------- | -------- | ------------------------ |
615
620
  | **db** | *none* | 12 | `n >= -60 && n <= -60` |
616
621
 
617
- - Node: **filler**<br/>
622
+ - Node: **a2a-filler**<br/>
618
623
  Purpose: **audio filler node**<br/>
619
- Example: `filler()`
624
+ Example: `a2a-filler()`
620
625
 
621
626
  > This node adds missing audio frames of silence in order to fill
622
627
  > the chronological gaps between generated audio frames (from
@@ -634,9 +639,9 @@ The following nodes process audio chunks only.
634
639
 
635
640
  The following nodes convert audio to text chunks.
636
641
 
637
- - Node: **openaitranscribe**<br/>
642
+ - Node: **a2t-openai**<br/>
638
643
  Purpose: **OpenAI/GPT Speech-to-Text conversion**<br/>
639
- Example: `openaitranscribe(language: "de")`<br/>
644
+ Example: `a2t-openai(language: "de")`<br/>
640
645
  Notice: this node requires an OpenAI API key!
641
646
 
642
647
  > This node uses OpenAI GPT to perform Speech-to-Text (S2T)
@@ -656,9 +661,9 @@ The following nodes convert audio to text chunks.
656
661
  | **language** | *none* | "en" | `/^(?:de\|en)$/` |
657
662
  | **interim** | *none* | false | *none* |
658
663
 
659
- - Node: **awstranscribe**<br/>
664
+ - Node: **a2t-amazon**<br/>
660
665
  Purpose: **Amazon Transcribe Speech-to-Text conversion**<br/>
661
- Example: `awstranscribe(language: "de")`<br/>
666
+ Example: `a2t-amazon(language: "de")`<br/>
662
667
  Notice: this node requires an API key!
663
668
 
664
669
  > This node uses Amazon Trancribe to perform Speech-to-Text (S2T)
@@ -678,9 +683,9 @@ The following nodes convert audio to text chunks.
678
683
  | **language** | *none* | "en" | `/^(?:en|de)$/` |
679
684
  | **interim** | *none* | false | *none* |
680
685
 
681
- - Node: **deepgram**<br/>
686
+ - Node: **a2t-deepgram**<br/>
682
687
  Purpose: **Deepgram Speech-to-Text conversion**<br/>
683
- Example: `deepgram(language: "de")`<br/>
688
+ Example: `a2t-deepgram(language: "de")`<br/>
684
689
  Notice: this node requires an API key!
685
690
 
686
691
  > This node performs Speech-to-Text (S2T) conversion, i.e., it
@@ -704,9 +709,9 @@ The following nodes convert audio to text chunks.
704
709
 
705
710
  The following nodes process text chunks only.
706
711
 
707
- - Node: **deepl**<br/>
712
+ - Node: **t2t-deepl**<br/>
708
713
  Purpose: **DeepL Text-to-Text translation**<br/>
709
- Example: `deepl(src: "de", dst: "en")`<br/>
714
+ Example: `t2t-deepl(src: "de", dst: "en")`<br/>
710
715
  Notice: this node requires an API key!
711
716
 
712
717
  > This node performs translation between English and German languages.
@@ -722,9 +727,9 @@ The following nodes process text chunks only.
722
727
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
723
728
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
724
729
 
725
- - Node: **awstranslate**<br/>
730
+ - Node: **t2t-amazon**<br/>
726
731
  Purpose: **AWS Translate Text-to-Text translation**<br/>
727
- Example: `awstranslate(src: "de", dst: "en")`<br/>
732
+ Example: `t2t-amazon(src: "de", dst: "en")`<br/>
728
733
  Notice: this node requires an API key!
729
734
 
730
735
  > This node performs translation between English and German languages.
@@ -742,9 +747,9 @@ The following nodes process text chunks only.
742
747
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
743
748
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
744
749
 
745
- - Node: **openai**<br/>
750
+ - Node: **t2t-openai**<br/>
746
751
  Purpose: **OpenAI/GPT Text-to-Text translation and spelling correction**<br/>
747
- Example: `openai(src: "de", dst: "en")`<br/>
752
+ Example: `t2t-openai(src: "de", dst: "en")`<br/>
748
753
  Notice: this node requires an OpenAI API key!
749
754
 
750
755
  > This node performs translation between English and German languages
@@ -766,9 +771,9 @@ The following nodes process text chunks only.
766
771
  | **key** | *none* | env.SPEECHFLOW\_OPENAI\_KEY | *none* |
767
772
  | **model** | *none* | "gpt-4o-mini" | *none* |
768
773
 
769
- - Node: **ollama**<br/>
774
+ - Node: **t2t-ollama**<br/>
770
775
  Purpose: **Ollama/Gemma Text-to-Text translation and spelling correction**<br/>
771
- Example: `ollama(src: "de", dst: "en")`<br/>
776
+ Example: `t2t-ollama(src: "de", dst: "en")`<br/>
772
777
  Notice: this node requires Ollama to be installed!
773
778
 
774
779
  > This node performs translation between English and German languages
@@ -789,9 +794,9 @@ The following nodes process text chunks only.
789
794
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
790
795
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
791
796
 
792
- - Node: **transformers**<br/>
797
+ - Node: **t2t-transformers**<br/>
793
798
  Purpose: **Transformers Text-to-Text translation**<br/>
794
- Example: `transformers(src: "de", dst: "en")`<br/>
799
+ Example: `t2t-transformers(src: "de", dst: "en")`<br/>
795
800
 
796
801
  > This node performs translation between English and German languages
797
802
  > in the text stream. It is based on local OPUS or SmolLM3 LLMs.
@@ -807,13 +812,51 @@ The following nodes process text chunks only.
807
812
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
808
813
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
809
814
 
810
- - Node: **sentence**<br/>
815
+ - Node: **t2t-google**<br/>
816
+ Purpose: **Google Cloud Translate Text-to-Text translation**<br/>
817
+ Example: `t2t-google(src: "de", dst: "en")`<br/>
818
+ Notice: this node requires a Google Cloud API key and project ID!
819
+
820
+ > This node performs translation between multiple languages
821
+ > in the text stream using Google Cloud Translate API.
822
+ > It supports German, English, French, and Italian languages.
823
+
824
+ | Port | Payload |
825
+ | ------- | ----------- |
826
+ | input | text |
827
+ | output | text |
828
+
829
+ | Parameter | Position | Default | Requirement |
830
+ | ------------ | --------- | -------- | ------------------ |
831
+ | **key** | *none* | env.SPEECHFLOW\_GOOGLE\_KEY | *none* |
832
+ | **src** | 0 | "de" | `/^(?:de\|en\|fr\|it)$/` |
833
+ | **dst** | 1 | "en" | `/^(?:de\|en\|fr\|it)$/` |
834
+
835
+ - Node: **t2t-modify**<br/>
836
+ Purpose: **regex-based text modification**<br/>
837
+ Example: `t2t-modify(match: "\\b(hello)\\b", replace: "hi $1")`<br/>
838
+
839
+ > This node allows regex-based modification of text chunks using pattern
840
+ > matching and replacement with support for $n backreferences. It is
841
+ > primarily intended for text preprocessing, cleanup, or transformation tasks.
842
+
843
+ | Port | Payload |
844
+ | ------- | ----------- |
845
+ | input | text |
846
+ | output | text |
847
+
848
+ | Parameter | Position | Default | Requirement |
849
+ | ------------ | --------- | -------- | ------------------ |
850
+ | **match** | 0 | "" | *required* |
851
+ | **replace** | 1 | "" | *required* |
852
+
853
+ - Node: **t2t-sentence**<br/>
811
854
  Purpose: **sentence splitting/merging**<br/>
812
- Example: `sentence()`<br/>
855
+ Example: `t2t-sentence()`<br/>
813
856
 
814
857
  > This node allows you to ensure that a text stream is split or merged
815
858
  > into complete sentences. It is primarily intended to be used after
816
- > the "deepgram" node and before "deepl" or "elevenlabs" nodes in
859
+ > the "a2t-deepgram" node and before "t2t-deepl" or "t2a-elevenlabs" nodes in
817
860
  > order to improve overall quality.
818
861
 
819
862
  | Port | Payload |
@@ -824,9 +867,9 @@ The following nodes process text chunks only.
824
867
  | Parameter | Position | Default | Requirement |
825
868
  | ------------ | --------- | -------- | ------------------ |
826
869
 
827
- - Node: **subtitle**<br/>
870
+ - Node: **t2t-subtitle**<br/>
828
871
  Purpose: **SRT/VTT Subtitle Generation**<br/>
829
- Example: `subtitle(format: "srt")`<br/>
872
+ Example: `t2t-subtitle(format: "srt")`<br/>
830
873
 
831
874
  > This node generates subtitles from the text stream (and its embedded
832
875
  > timestamps) in the formats SRT (SubRip) or VTT (WebVTT).
@@ -841,9 +884,9 @@ The following nodes process text chunks only.
841
884
  | **format** | *none* | "srt" | /^(?:srt\|vtt)$/ |
842
885
  | **words** | *none* | false | *none* |
843
886
 
844
- - Node: **format**<br/>
887
+ - Node: **t2t-format**<br/>
845
888
  Purpose: **text paragraph formatting**<br/>
846
- Example: `format(width: 80)`<br/>
889
+ Example: `t2t-format(width: 80)`<br/>
847
890
 
848
891
  > This node formats the text stream into lines no longer than a
849
892
  > certain width. It is primarily intended for use before writing text
@@ -862,9 +905,9 @@ The following nodes process text chunks only.
862
905
 
863
906
  The following nodes convert text chunks to audio chunks.
864
907
 
865
- - Node: **awspolly**<br/>
908
+ - Node: **t2a-amazon**<br/>
866
909
  Purpose: **Amazon Polly Text-to-Speech conversion**<br/>
867
- Example: `awspolly(language: "en", voice: "Danielle)`<br/>
910
+ Example: `t2a-amazon(language: "en", voice: "Danielle)`<br/>
868
911
  Notice: this node requires an Amazon API key!
869
912
 
870
913
  > This node uses Amazon Polly to perform Text-to-Speech (T2S)
@@ -884,9 +927,9 @@ The following nodes convert text chunks to audio chunks.
884
927
  | **voice** | 0 | "Amy" | `^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Viki|Daniel)$/` |
885
928
  | **language** | 1 | "en" | `/^(?:de\|en)$/` |
886
929
 
887
- - Node: **elevenlabs**<br/>
930
+ - Node: **t2a-elevenlabs**<br/>
888
931
  Purpose: **ElevenLabs Text-to-Speech conversion**<br/>
889
- Example: `elevenlabs(language: "en")`<br/>
932
+ Example: `t2a-elevenlabs(language: "en")`<br/>
890
933
  Notice: this node requires an ElevenLabs API key!
891
934
 
892
935
  > This node uses ElevenLabs to perform Text-to-Speech (T2S)
@@ -908,9 +951,9 @@ The following nodes convert text chunks to audio chunks.
908
951
  | **similarity** | 4 | 0.75 | `n >= 0.0 && n <= 1.0` |
909
952
  | **optimize** | 5 | "latency" | `/^(?:latency\|quality)$/` |
910
953
 
911
- - Node: **kokoro**<br/>
954
+ - Node: **t2a-kokoro**<br/>
912
955
  Purpose: **Kokoro Text-to-Speech conversion**<br/>
913
- Example: `kokoro(language: "en")`<br/>
956
+ Example: `t2a-kokoro(language: "en")`<br/>
914
957
  Notice: this currently support English language only!
915
958
 
916
959
  > This node uses Kokoro to perform Text-to-Speech (T2S) conversion,
@@ -932,12 +975,12 @@ The following nodes convert text chunks to audio chunks.
932
975
 
933
976
  The following nodes process any type of chunk, i.e., both audio and text chunks.
934
977
 
935
- - Node: **filter**<br/>
978
+ - Node: **x2x-filter**<br/>
936
979
  Purpose: **meta information based filter**<br/>
937
- Example: `filter(type: "audio", var: "meta:gender", op: "==", val: "male")`<br/>
980
+ Example: `x2x-filter(type: "audio", var: "meta:gender", op: "==", val: "male")`<br/>
938
981
 
939
982
  > This node allows you to filter nodes based on certain criteria. It
940
- > is primarily intended to be used in conjunction with the "gender"
983
+ > is primarily intended to be used in conjunction with the "a2a-gender"
941
984
  > node and in front of the `elevenlabs` or `kokoro` nodes in order to
942
985
  > translate with a corresponding voice.
943
986
 
@@ -954,9 +997,9 @@ The following nodes process any type of chunk, i.e., both audio and text chunks.
954
997
  | **op** | 3 | "==" | `/^(?:<\|<=\|==\|!=\|~~\|!~\|>=\|>)$/` |
955
998
  | **val** | 4 | "" | `/^.*$/` |
956
999
 
957
- - Node: **trace**<br/>
1000
+ - Node: **x2x-trace**<br/>
958
1001
  Purpose: **data flow tracing**<br/>
959
- Example: `trace(type: "audio")`<br/>
1002
+ Example: `x2x-trace(type: "audio")`<br/>
960
1003
 
961
1004
  > This node allows you to trace the audio and text chunk flow through
962
1005
  > the **SpeechFlow** graph. It just passes through its chunks, but
@@ -978,33 +1021,33 @@ REST/WebSocket API
978
1021
  **SpeechFlow** has an externally exposed REST/WebSockets API which can
979
1022
  be used to control the nodes and to receive information from nodes.
980
1023
  For controlling a node you have three possibilities (illustrated by
981
- controlling the mode of the "mute" node):
1024
+ controlling the mode of the "a2a-mute" node):
982
1025
 
983
1026
  ```sh
984
1027
  # use HTTP/REST/GET:
985
- $ curl http://127.0.0.1:8484/api/COMMAND/mute/mode/silenced
1028
+ $ curl http://127.0.0.1:8484/api/COMMAND/a2a-mute/mode/silenced
986
1029
  ```
987
1030
 
988
1031
  ```sh
989
1032
  # use HTTP/REST/POST:
990
1033
  $ curl -H "Content-type: application/json" \
991
- --data '{ "request": "COMMAND", "node": "mute", "args": [ "mode", "silenced" ] }' \
1034
+ --data '{ "request": "COMMAND", "node": "a2a-mute", "args": [ "mode", "silenced" ] }' \
992
1035
  http://127.0.0.1:8484/api
993
1036
  ```
994
1037
 
995
1038
  ```sh
996
1039
  # use WebSockets:
997
1040
  $ wscat -c ws://127.0.0.1:8484/api \
998
- > { "request": "COMMAND", "node": "mute", "args": [ "mode", "silenced" ] }
1041
+ > { "request": "COMMAND", "node": "a2a-mute", "args": [ "mode", "silenced" ] }
999
1042
  ```
1000
1043
 
1001
1044
  For receiving emitted information from nodes, you have to use the WebSockets
1002
- API (illustrated by the emitted information of the "meter" node):
1045
+ API (illustrated by the emitted information of the "a2a-meter" node):
1003
1046
 
1004
1047
  ```sh
1005
1048
  # use WebSockets:
1006
1049
  $ wscat -c ws://127.0.0.1:8484/api \
1007
- < { "response": "NOTIFY", "node": "meter", "args": [ "meter", "LUFS-S", -35.75127410888672 ] }
1050
+ < { "response": "NOTIFY", "node": "a2a-meter", "args": [ "meter", "LUFS-S", -35.75127410888672 ] }
1008
1051
  ```
1009
1052
 
1010
1053
  History