speechflow 1.5.1 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +192 -171
  3. package/etc/claude.md +83 -46
  4. package/etc/speechflow.yaml +84 -84
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-api.d.ts +12 -0
  7. package/speechflow-cli/dst/speechflow-main-api.js +319 -0
  8. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -0
  9. package/speechflow-cli/dst/speechflow-main-cli.d.ts +28 -0
  10. package/speechflow-cli/dst/speechflow-main-cli.js +271 -0
  11. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -0
  12. package/speechflow-cli/dst/speechflow-main-config.d.ts +9 -0
  13. package/speechflow-cli/dst/speechflow-main-config.js +27 -0
  14. package/speechflow-cli/dst/speechflow-main-config.js.map +1 -0
  15. package/speechflow-cli/dst/speechflow-main-graph.d.ts +34 -0
  16. package/speechflow-cli/dst/speechflow-main-graph.js +367 -0
  17. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -0
  18. package/speechflow-cli/dst/speechflow-main-nodes.d.ts +10 -0
  19. package/speechflow-cli/dst/speechflow-main-nodes.js +60 -0
  20. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -0
  21. package/speechflow-cli/dst/speechflow-main-status.d.ts +11 -0
  22. package/speechflow-cli/dst/speechflow-main-status.js +60 -0
  23. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -0
  24. package/speechflow-cli/dst/speechflow-main.d.ts +7 -0
  25. package/speechflow-cli/dst/speechflow-main.js +127 -0
  26. package/speechflow-cli/dst/speechflow-main.js.map +1 -0
  27. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +4 -4
  28. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +8 -9
  31. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +5 -5
  33. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  36. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  37. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.d.ts +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +8 -8
  39. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +6 -6
  42. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
  44. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +5 -5
  45. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +7 -7
  48. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +5 -5
  51. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
  53. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +3 -3
  54. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
  56. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +7 -7
  57. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
  59. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +7 -7
  60. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
  62. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +7 -7
  63. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  64. package/speechflow-cli/dst/speechflow-node-a2a-wav.d.ts +1 -1
  65. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +3 -3
  66. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  67. package/speechflow-cli/dst/{speechflow-node-a2t-awstranscribe.d.ts → speechflow-node-a2t-amazon.d.ts} +1 -1
  68. package/speechflow-cli/dst/{speechflow-node-a2t-awstranscribe.js → speechflow-node-a2t-amazon.js} +11 -11
  69. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -0
  70. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
  71. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +7 -7
  72. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  73. package/speechflow-cli/dst/{speechflow-node-a2t-openaitranscribe.d.ts → speechflow-node-a2t-openai.d.ts} +1 -1
  74. package/speechflow-cli/dst/{speechflow-node-a2t-openaitranscribe.js → speechflow-node-a2t-openai.js} +11 -11
  75. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -0
  76. package/speechflow-cli/dst/{speechflow-node-t2a-awspolly.d.ts → speechflow-node-t2a-amazon.d.ts} +1 -1
  77. package/speechflow-cli/dst/{speechflow-node-t2a-awspolly.js → speechflow-node-t2a-amazon.js} +9 -9
  78. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -0
  79. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  80. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -5
  81. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  83. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +7 -7
  84. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  85. package/speechflow-cli/dst/{speechflow-node-t2t-awstranslate.d.ts → speechflow-node-t2t-amazon.d.ts} +1 -1
  86. package/speechflow-cli/dst/{speechflow-node-t2t-awstranslate.js → speechflow-node-t2t-amazon.js} +7 -7
  87. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -0
  88. package/speechflow-cli/dst/speechflow-node-t2t-deepl.d.ts +1 -1
  89. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +5 -5
  90. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  91. package/speechflow-cli/dst/speechflow-node-t2t-format.d.ts +1 -1
  92. package/speechflow-cli/dst/speechflow-node-t2t-format.js +3 -3
  93. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  94. package/speechflow-cli/dst/speechflow-node-t2t-google.d.ts +1 -1
  95. package/speechflow-cli/dst/speechflow-node-t2t-google.js +8 -8
  96. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  97. package/speechflow-cli/dst/{speechflow-node-a2a-dynamics.d.ts → speechflow-node-t2t-modify.d.ts} +1 -5
  98. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +111 -0
  99. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -0
  100. package/speechflow-cli/dst/speechflow-node-t2t-ollama.d.ts +1 -1
  101. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +5 -5
  102. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  103. package/speechflow-cli/dst/speechflow-node-t2t-openai.d.ts +1 -1
  104. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +5 -5
  105. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  106. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
  107. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +5 -5
  108. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  109. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.d.ts +1 -1
  110. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -5
  111. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  112. package/speechflow-cli/dst/speechflow-node-t2t-transformers.d.ts +1 -1
  113. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +5 -5
  114. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  115. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -1
  116. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +5 -5
  117. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  118. package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
  119. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +3 -3
  120. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  121. package/speechflow-cli/dst/speechflow-node-xio-device.d.ts +1 -1
  122. package/speechflow-cli/dst/speechflow-node-xio-device.js +8 -8
  123. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  124. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -1
  125. package/speechflow-cli/dst/speechflow-node-xio-file.js +50 -29
  126. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  127. package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -1
  128. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +7 -7
  129. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  130. package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -1
  131. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +10 -10
  132. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  133. package/speechflow-cli/dst/{speechflow-utils-audio-wt.js → speechflow-util-audio-wt.js} +1 -1
  134. package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -0
  135. package/speechflow-cli/dst/speechflow-util-audio.d.ts +22 -0
  136. package/speechflow-cli/dst/speechflow-util-audio.js +251 -0
  137. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -0
  138. package/speechflow-cli/dst/speechflow-util-error.d.ts +14 -0
  139. package/speechflow-cli/dst/speechflow-util-error.js +131 -0
  140. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -0
  141. package/speechflow-cli/dst/speechflow-util-queue.d.ts +68 -0
  142. package/speechflow-cli/dst/speechflow-util-queue.js +338 -0
  143. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -0
  144. package/speechflow-cli/dst/speechflow-util-stream.d.ts +18 -0
  145. package/speechflow-cli/dst/speechflow-util-stream.js +219 -0
  146. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -0
  147. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +124 -0
  148. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +1 -0
  149. package/speechflow-cli/dst/{speechflow-utils-audio.js → speechflow-util-webaudio.js} +2 -2
  150. package/speechflow-cli/dst/speechflow-util-webaudio.js.map +1 -0
  151. package/speechflow-cli/dst/speechflow-util.d.ts +4 -0
  152. package/speechflow-cli/dst/speechflow-util.js +26 -0
  153. package/speechflow-cli/dst/speechflow-util.js.map +1 -0
  154. package/speechflow-cli/dst/speechflow.js +3 -906
  155. package/speechflow-cli/dst/speechflow.js.map +1 -1
  156. package/speechflow-cli/etc/oxlint.jsonc +4 -1
  157. package/speechflow-cli/package.json +12 -11
  158. package/speechflow-cli/src/speechflow-main-api.ts +315 -0
  159. package/speechflow-cli/src/speechflow-main-cli.ts +259 -0
  160. package/speechflow-cli/src/speechflow-main-config.ts +17 -0
  161. package/speechflow-cli/src/speechflow-main-graph.ts +372 -0
  162. package/speechflow-cli/src/speechflow-main-nodes.ts +61 -0
  163. package/speechflow-cli/src/speechflow-main-status.ts +70 -0
  164. package/speechflow-cli/src/speechflow-main.ts +106 -0
  165. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -4
  166. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +7 -8
  167. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +5 -5
  168. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +7 -8
  169. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +7 -7
  170. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +6 -6
  171. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +4 -4
  172. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +6 -6
  173. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +4 -4
  174. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +2 -2
  175. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +6 -6
  176. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +6 -6
  177. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +6 -6
  178. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -2
  179. package/speechflow-cli/src/{speechflow-node-a2t-awstranscribe.ts → speechflow-node-a2t-amazon.ts} +10 -10
  180. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +7 -7
  181. package/speechflow-cli/src/{speechflow-node-a2t-openaitranscribe.ts → speechflow-node-a2t-openai.ts} +10 -10
  182. package/speechflow-cli/src/{speechflow-node-t2a-awspolly.ts → speechflow-node-t2a-amazon.ts} +7 -7
  183. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +4 -4
  184. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +6 -6
  185. package/speechflow-cli/src/{speechflow-node-t2t-awstranslate.ts → speechflow-node-t2t-amazon.ts} +5 -5
  186. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +4 -4
  187. package/speechflow-cli/src/speechflow-node-t2t-format.ts +2 -2
  188. package/speechflow-cli/src/speechflow-node-t2t-google.ts +7 -7
  189. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +84 -0
  190. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +4 -4
  191. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +4 -4
  192. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +4 -4
  193. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +9 -9
  194. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +4 -4
  195. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -4
  196. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +2 -2
  197. package/speechflow-cli/src/speechflow-node-xio-device.ts +7 -7
  198. package/speechflow-cli/src/speechflow-node-xio-file.ts +49 -28
  199. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +7 -7
  200. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  201. package/speechflow-cli/src/{speechflow-utils-audio.ts → speechflow-util-audio.ts} +131 -1
  202. package/speechflow-cli/src/speechflow-util-error.ts +184 -0
  203. package/speechflow-cli/src/speechflow-util-queue.ts +320 -0
  204. package/speechflow-cli/src/speechflow-util-stream.ts +197 -0
  205. package/speechflow-cli/src/speechflow-util.ts +10 -0
  206. package/speechflow-cli/src/speechflow.ts +3 -947
  207. package/speechflow-ui-db/package.json +3 -3
  208. package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
  209. package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
  210. package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
  211. package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
  212. package/speechflow-ui-st/dst/index.css +2 -2
  213. package/speechflow-ui-st/dst/index.js +32 -33
  214. package/speechflow-ui-st/package.json +4 -4
  215. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +0 -208
  216. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +0 -1
  217. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +0 -312
  218. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +0 -1
  219. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +0 -1
  220. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +0 -1
  221. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +0 -1
  222. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +0 -1
  223. package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +0 -1
  224. package/speechflow-cli/dst/speechflow-utils-audio.js.map +0 -1
  225. package/speechflow-cli/dst/speechflow-utils.d.ts +0 -108
  226. package/speechflow-cli/dst/speechflow-utils.js +0 -740
  227. package/speechflow-cli/dst/speechflow-utils.js.map +0 -1
  228. package/speechflow-cli/src/speechflow-utils.ts +0 -804
  229. /package/speechflow-cli/dst/{speechflow-node-a2a-dynamics-wt.d.ts → speechflow-util-audio-wt.d.ts} +0 -0
  230. /package/speechflow-cli/dst/{speechflow-utils-audio-wt.d.ts → speechflow-util-webaudio-wt.d.ts} +0 -0
  231. /package/speechflow-cli/dst/{speechflow-utils-audio.d.ts → speechflow-util-webaudio.d.ts} +0 -0
  232. /package/speechflow-cli/src/{speechflow-utils-audio-wt.ts → speechflow-util-audio-wt.ts} +0 -0
package/etc/claude.md CHANGED
@@ -1,70 +1,107 @@
1
-
2
1
  # CLAUDE.md
3
2
 
4
- This file provides guidance to Claude Code (claude.ai/code) when working
5
- with code in this repository.
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
6
4
 
7
5
  ## Project Overview
8
6
 
9
- SpeechFlow is a command-line interface tool for establishing directed
10
- data flow graphs of audio and text processing nodes. It enables flexible
11
- speech processing tasks including capturing audio, text-to-speech,
12
- speech-to-text, and speech-to-speech translation.
7
+ SpeechFlow is a command-line interface tool for establishing directed data flow graphs of audio and text processing nodes. It enables flexible speech processing tasks including capturing audio, text-to-speech, speech-to-text, and speech-to-speech translation.
13
8
 
14
9
  ## Architecture
15
10
 
16
- SpeechFlow uses a modular node-based architecture:
11
+ SpeechFlow uses a modular node-based architecture with three main components:
17
12
 
18
- - **Core Engine**: TypeScript-based CLI tool that orchestrates processing flows
19
- - **Processing Nodes**: Modular components for different speech processing tasks (see `src/speechflow-node-*.ts`)
20
- - **Flow Expression Language**: Based on FlowLink for defining processing graphs
21
- - **Web Interfaces**: Two Vue.js applications for dashboard and subtitle display
22
- - **REST/WebSocket API**: External control interface for nodes
13
+ - **speechflow-cli**: Core TypeScript-based CLI engine that orchestrates processing flows
14
+ - **speechflow-ui-db**: Dashboard UI component for real-time visualization
15
+ - **speechflow-ui-st**: Subtitle UI component for displaying live subtitles
23
16
 
24
- ### Key Components
17
+ ### Processing Node Categories
25
18
 
26
- - **Main CLI**:
27
- `src/speechflow.ts` - Entry point and CLI parsing
28
- - **Nodes**:
29
- - Input/Output: `file`, `device`, `websocket`, `mqtt`
30
- - Audio-to-Audio: `ffmpeg`, `wav`, `mute`, `meter`, `vad`, `gender`
31
- - Audio-to-Text: `deepgram`
32
- - Text-to-Text: `deepl`, `openai`, `ollama`, `transformers`, `subtitle`, `format`, `sentence`
33
- - Text-to-Audio: `elevenlabs`, `kokoro`
34
- - Any-to-Any: `filter`, `trace`
19
+ - **Input/Output (xio)**: file, device, websocket, mqtt
20
+ - **Audio-to-Audio (a2a)**: ffmpeg, wav, mute, meter, vad, gender, gain, filler, compressor, expander, rnnoise, speex
21
+ - **Audio-to-Text (a2t)**: deepgram, amazon, openai
22
+ - **Text-to-Text (t2t)**: deepl, google, amazon, openai, ollama, transformers, subtitle, format, sentence, modify
23
+ - **Text-to-Audio (t2a)**: elevenlabs, kokoro, amazon
24
+ - **Any-to-Any (x2x)**: filter, trace
35
25
 
36
26
  ## Development Commands
37
27
 
38
- The project uses STX (Simple Task eXecutor) for build automation. Main commands:
39
-
40
- ### Core Project
41
-
42
28
  ```bash
43
- npm start lint # Static code analysis (TypeScript, ESLint, Biome, Oxlint)
44
- npm start build # Compile TypeScript to JavaScript in dst/
45
- npm start dev # Multi-pane development dashboard with linting, building, and server
46
- npm start server # Run the main speechflow program
29
+ # Top-level commands (from root directory)
30
+ npm start lint # Lint all components (TypeScript, ESLint, Biome, Oxlint)
31
+ npm start build # Build all components (full production build)
47
32
  npm start clean # Remove generated files
33
+ npm start upd # Update all NPM dependencies
34
+
35
+ # Component-specific development (from speechflow-cli/)
36
+ npm start dev # Multi-pane dashboard with linting, building, and server
37
+ npm start lint # Static code analysis
38
+ npm start build # Compile TypeScript to JavaScript
39
+ npm start server # Run the main speechflow program
40
+ npm start clean # Clean generated files
41
+
42
+ # Testing
43
+ npm start test # Run test configuration with sample pipeline
48
44
  ```
49
45
 
50
- ## Project Structure
46
+ ## Key Implementation Files
47
+
48
+ ### Core Engine
49
+ - `speechflow-cli/src/speechflow.ts` - Main CLI entry point and orchestration
50
+ - `speechflow-cli/src/speechflow-node.ts` - Base node class with stream processing
51
+ - `speechflow-cli/src/speechflow-utils.ts` - Utility functions and helpers
52
+
53
+ ### Node Implementations
54
+ All node implementations follow the pattern `speechflow-node-{category}-{name}.ts` in `speechflow-cli/src/`.
51
55
 
52
- - `src/` - Main TypeScript source files
53
- - `dst/` - Compiled JavaScript output
54
- - `etc/` - Configuration files (TypeScript, ESLint, Biome, etc.)
55
- - `package.d/` - NPM package patches
56
+ ### Stream Processing Architecture
57
+ - Uses Node.js object-mode streams with timestamp metadata
58
+ - Audio chunks: PCM format, 16-bit, 16kHz, mono
59
+ - Text chunks: Include timing information and metadata (gender, final/interim)
60
+ - All streams maintain chronological timestamps for synchronization
56
61
 
57
- ## Development Notes
62
+ ## API Integration
58
63
 
59
- - Node.js 22+ required
60
- - Uses object-mode streaming with timestamps for audio/text processing
61
- - External services integration: Deepgram, ElevenLabs, DeepL, OpenAI, Ollama
62
- - Supports local processing: FFmpeg, WAV, Voice Activity Detection, Gender Detection
63
- - REST/WebSocket API on port 8484 for external control
64
+ REST/WebSocket API available on port 8484 (configurable) for:
65
+ - External node control (muting, configuration)
66
+ - Real-time metrics (audio levels, text flow)
67
+ - Dashboard and UI connectivity
68
+
69
+ ## Environment Configuration
70
+
71
+ Key environment variables for service integrations:
72
+ - `SPEECHFLOW_DEEPGRAM_KEY` - Deepgram API key
73
+ - `SPEECHFLOW_ELEVENLABS_KEY` - ElevenLabs API key
74
+ - `SPEECHFLOW_DEEPL_KEY` - DeepL API key
75
+ - `SPEECHFLOW_OPENAI_KEY` - OpenAI API key
76
+ - `SPEECHFLOW_GOOGLE_KEY` - Google Cloud API key
77
+ - `SPEECHFLOW_AWS_ACCESS_KEY_ID` - AWS access key
78
+ - `SPEECHFLOW_AWS_SECRET_ACCESS_KEY` - AWS secret key
79
+ - `SPEECHFLOW_AWS_REGION` - AWS region
80
+ - `SPEECHFLOW_DEVICE_MIC` - Microphone device identifier
81
+ - `SPEECHFLOW_DEVICE_SPK` - Speaker device identifier
82
+
83
+ ## Flow Expression Language
84
+
85
+ Based on FlowLink with support for:
86
+ - Sequential pipelines: `node1 | node2 | node3`
87
+ - Parallel branches: `node1, node2, node3`
88
+ - Grouping: `{ node1 | node2 }`
89
+ - Parameters: `node(param1: value, param2: "string")`
90
+ - Environment variables: `env.VARIABLE_NAME`
91
+ - Command arguments: `argv.0`, `argv.1`
92
+
93
+ ## Testing Approach
94
+
95
+ Run tests using the test configuration:
96
+ ```bash
97
+ npm start test
98
+ ```
64
99
 
65
- ## Configuration
100
+ This executes a sample pipeline defined in `etc/speechflow.yaml` with dashboard visualization.
66
101
 
67
- Main configuration in `etc/speechflow.yaml` with example
68
- processing graphs. Environment variables used for API keys (e.g.,
69
- `SPEECHFLOW_DEEPGRAM_KEY`, `SPEECHFLOW_ELEVENLABS_KEY`).
102
+ ## Important Patterns
70
103
 
104
+ 1. **Stream Processing**: All nodes extend `SpeechFlowNode` and implement `process()` method for stream transformation
105
+ 2. **Error Handling**: Nodes emit errors via stream events, captured and logged centrally
106
+ 3. **Timestamp Preservation**: Audio/text chunks maintain timing for synchronization across pipeline
107
+ 4. **Meta Information**: Chunks carry metadata (gender, final/interim status) for downstream filtering
@@ -4,74 +4,74 @@
4
4
 
5
5
  # Capture and meter audio from microphone device into WAV audio file
6
6
  capturing: |
7
- device(device: "coreaudio:Elgato Wave:3", mode: "r") |
8
- vad() |
9
- meter(1000) |
10
- wav(mode: "encode") |
11
- file(path: "capture.wav", mode: "w", type: "audio")
7
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
8
+ a2a-vad() |
9
+ a2a-meter(1000) |
10
+ a2a-wav(mode: "encode") |
11
+ xio-file(path: "capture.wav", mode: "w", type: "audio")
12
12
 
13
13
  # Pass-through audio from microphone device to speaker
14
14
  # device and in parallel record it to WAV audio file
15
15
  pass-through: |
16
- device(device: "wasapi:VoiceMeeter Out B1", mode: "r") | {
17
- wav(mode: "encode") |
18
- file(path: "capture.wav", mode: "w", type: "audio"),
19
- device(device: "wasapi:VoiceMeeter VAIO3 Input", mode: "w")
16
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
17
+ a2a-wav(mode: "encode") |
18
+ xio-file(path: "capture.wav", mode: "w", type: "audio"),
19
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
20
20
  }
21
21
 
22
22
  # Generate text file with German transcription of German MP3 audio file
23
23
  transcription: |
24
- file(path: argv.0, mode: "r", type: "audio") |
25
- ffmpeg(src: "mp3", dst: "pcm") |
26
- deepgram(language: "de") |
27
- format(width: 80) |
28
- file(path: argv.1, mode: "w", type: "text")
24
+ xio-file(path: argv.0, mode: "r", type: "audio") |
25
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
26
+ a2t-deepgram(language: "de") |
27
+ t2t-format(width: 80) |
28
+ xio-file(path: argv.1, mode: "w", type: "text")
29
29
 
30
30
  # Generate WebVTT file with German closed captions of German MP3 audio file
31
31
  captioning: |
32
- file(path: argv.0, mode: "r", type: "audio") |
33
- ffmpeg(src: "mp3", dst: "pcm") |
34
- deepgram(language: "de") |
35
- subtitle(format: "vtt") |
36
- file(path: argv.1, mode: "w", type: "text")
32
+ xio-file(path: argv.0, mode: "r", type: "audio") |
33
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
34
+ a2t-deepgram(language: "de") |
35
+ t2t-subtitle(format: "vtt") |
36
+ xio-file(path: argv.1, mode: "w", type: "text")
37
37
 
38
38
  # Generate WebVTT file with English subtitles of German MP3 audio file
39
39
  subtitling: |
40
- file(path: argv.0, mode: "r", type: "audio") |
41
- ffmpeg(src: "mp3", dst: "pcm") |
42
- deepgram(language: "de") |
43
- deepl(src: "de", dst: "en") |
44
- subtitle(format: "vtt") |
45
- file(path: argv.1, mode: "w", type: "text")
40
+ xio-file(path: argv.0, mode: "r", type: "audio") |
41
+ a2a-ffmpeg(src: "mp3", dst: "pcm") |
42
+ a2t-deepgram(language: "de") |
43
+ t2t-deepl(src: "de", dst: "en") |
44
+ t2t-subtitle(format: "vtt") |
45
+ xio-file(path: argv.1, mode: "w", type: "text")
46
46
 
47
47
  # Ad-Hoc text translation from German to English
48
48
  translation: |
49
- file(path: "-", mode: "r", type: "text") |
50
- deepl(src: "de", dst: "en") |
51
- file(path: "-", mode: "w", type: "text")
49
+ xio-file(path: "-", mode: "r", type: "text") |
50
+ t2t-deepl(src: "de", dst: "en") |
51
+ xio-file(path: "-", mode: "w", type: "text")
52
52
 
53
53
  # Generate audio file with English voice for a text file
54
54
  speaking: |
55
- file(path: argv.0, mode: "r", type: "text") |
56
- kokoro(language: "en") |
57
- wav(mode: "encode") |
58
- file(path: argv.1, mode: "w", type: "audio")
55
+ xio-file(path: argv.0, mode: "r", type: "text") |
56
+ t2a-kokoro(language: "en") |
57
+ a2a-wav(mode: "encode") |
58
+ xio-file(path: argv.1, mode: "w", type: "audio")
59
59
 
60
60
  # Batch studio transcription from German to English,
61
61
  # including the capturing of all involved inputs and outputs:
62
62
  studio-transcription: |
63
- file(path: argv.0, mode: "r", type: "audio") | {
64
- ffmpeg(src: "mp3", dst: "pcm") | {
65
- deepgram(language: "de") | {
66
- format(width: 80) |
67
- file(path: argv.1, mode: "w", type: "text"),
68
- subtitle(format: "vtt") |
69
- file(path: argv.2, mode: "w", type: "text"),
70
- subtitle(format: "srt") |
71
- file(path: argv.3, mode: "w", type: "text"),
72
- elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en") |
73
- wav(mode: "encode") |
74
- file(path: argv.4, mode: "w", type: "audio")
63
+ xio-file(path: argv.0, mode: "r", type: "audio") | {
64
+ a2a-ffmpeg(src: "mp3", dst: "pcm") | {
65
+ a2t-deepgram(language: "de") | {
66
+ t2t-format(width: 80) |
67
+ xio-file(path: argv.1, mode: "w", type: "text"),
68
+ t2t-subtitle(format: "vtt") |
69
+ xio-file(path: argv.2, mode: "w", type: "text"),
70
+ t2t-subtitle(format: "srt") |
71
+ xio-file(path: argv.3, mode: "w", type: "text"),
72
+ t2a-elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en") |
73
+ a2a-wav(mode: "encode") |
74
+ xio-file(path: argv.4, mode: "w", type: "audio")
75
75
  }
76
76
  }
77
77
  }
@@ -79,35 +79,35 @@ studio-transcription: |
79
79
  # Real-time studio translation from German to English,
80
80
  # including the capturing of all involved inputs and outputs:
81
81
  studio-translation: |
82
- device(device: "coreaudio:Elgato Wave:3", mode: "r") | {
83
- gender() | {
84
- meter(interval: 250, dashboard: "meter1") |
85
- wav(mode: "encode") |
86
- file(path: "program-de.wav", mode: "w", type: "audio"),
87
- deepgram(language: "de", key: interim: true) | {
88
- trace(name: "trace1", type: "text", dashboard: "text1")
89
- subtitle(format: "vtt", words: true) |
90
- file(path: "program-de.vtt", mode: "w", type: "text"),
91
- sentence() | {
92
- trace(name: "trace2", type: "text", notify: true, dashboard: "text2") |
93
- format(width: 80) |
94
- file(path: "program-de.txt", mode: "w", type: "text"),
95
- deepl(src: "de", dst: "en") | {
96
- trace(name: "trace3", type: "text", dashboard: "text3") | {
97
- format(width: 80) |
98
- file(path: "program-en.txt", mode: "w", type: "text"),
99
- subtitle(format: "vtt", words: false) |
100
- file(path: "program-en.vtt", mode: "w", type: "text"),
82
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
83
+ a2a-gender() | {
84
+ a2a-meter(interval: 250, dashboard: "meter1") |
85
+ a2a-wav(mode: "encode") |
86
+ xio-file(path: "program-de.wav", mode: "w", type: "audio"),
87
+ a2t-deepgram(language: "de", key: interim: true) | {
88
+ x2x-trace(name: "trace1", type: "text", dashboard: "text1")
89
+ t2t-subtitle(format: "vtt", words: true) |
90
+ xio-file(path: "program-de.vtt", mode: "w", type: "text"),
91
+ t2t-sentence() | {
92
+ x2x-trace(name: "trace2", type: "text", notify: true, dashboard: "text2") |
93
+ t2t-format(width: 80) |
94
+ xio-file(path: "program-de.txt", mode: "w", type: "text"),
95
+ t2t-deepl(src: "de", dst: "en") | {
96
+ x2x-trace(name: "trace3", type: "text", dashboard: "text3") | {
97
+ t2t-format(width: 80) |
98
+ xio-file(path: "program-en.txt", mode: "w", type: "text"),
99
+ t2t-subtitle(format: "vtt", words: false) |
100
+ xio-file(path: "program-en.vtt", mode: "w", type: "text"),
101
101
  {
102
- filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
103
- elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
104
- filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
105
- elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
102
+ x2x-filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
103
+ t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
104
+ x2x-filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
105
+ t2a-elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
106
106
  } | {
107
- meter(interval: 250, dashboard: "meter2"),
108
- wav(mode: "encode") |
109
- file(path: "program-en.wav", mode: "w", type: "audio"),
110
- device(device: "coreaudio:USBAudio2.0", mode: "w")
107
+ a2a-meter(interval: 250, dashboard: "meter2"),
108
+ a2a-wav(mode: "encode") |
109
+ xio-file(path: "program-en.wav", mode: "w", type: "audio"),
110
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
111
111
  }
112
112
  }
113
113
  }
@@ -118,18 +118,18 @@ studio-translation: |
118
118
 
119
119
  # Test-drive for development
120
120
  test: |
121
- device(device: "coreaudio:Elgato Wave:3", mode: "r") |
122
- meter(interval: 50, dashboard: "meter1") |
123
- deepgram(language: "de", model: "nova-2", interim: true) |
124
- trace(type: "text", dashboard: "text1") | {
125
- subtitle(mode: "render", addr: "127.0.0.1", port: 8585),
126
- filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
127
- sentence() |
128
- trace(type: "text", dashboard: "text2") |
129
- deepl(src: "de", dst: "en") |
130
- trace(type: "text", dashboard: "text3") |
131
- elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
132
- meter(interval: 50, dashboard: "meter2") |
133
- device(device: "coreaudio:USBAudio2.0", mode: "w")
121
+ xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
122
+ a2a-meter(interval: 50, dashboard: "meter1") |
123
+ a2t-deepgram(language: "de", model: "nova-2", interim: true) |
124
+ x2x-trace(type: "text", dashboard: "text1") | {
125
+ t2t-subtitle(mode: "render", addr: "127.0.0.1", port: 8585),
126
+ x2x-filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
127
+ t2t-sentence() |
128
+ x2x-trace(type: "text", dashboard: "text2") |
129
+ t2t-deepl(src: "de", dst: "en") |
130
+ x2x-trace(type: "text", dashboard: "text3") |
131
+ t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
132
+ a2a-meter(interval: 50, dashboard: "meter2") |
133
+ xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
134
134
  }
135
135
 
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "speechflow",
3
- "version": "1.5.1",
4
- "x-stdver": "1.5.1-GA",
5
- "x-release": "2025-09-02",
3
+ "version": "1.6.1",
4
+ "x-stdver": "1.6.1-GA",
5
+ "x-release": "2025-09-06",
6
6
  "homepage": "https://github.com/rse/speechflow",
7
7
  "description": "Speech Processing Flow Graph",
8
8
  "keywords": [ "speech", "audio", "flow", "graph" ],
@@ -0,0 +1,12 @@
1
+ import CLIio from "cli-io";
2
+ import { CLIOptions } from "./speechflow-main-cli";
3
+ import { NodeGraph } from "./speechflow-main-graph";
4
+ export declare class APIServer {
5
+ private cli;
6
+ private wsPeers;
7
+ private hapi;
8
+ private sendOSC;
9
+ constructor(cli: CLIio);
10
+ start(args: CLIOptions, graph: NodeGraph): Promise<void>;
11
+ stop(args: CLIOptions): Promise<void>;
12
+ }