speechflow 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +242 -7
  3. package/etc/claude.md +70 -0
  4. package/etc/speechflow.yaml +13 -11
  5. package/etc/stx.conf +7 -0
  6. package/package.json +7 -6
  7. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
  8. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
  9. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
  10. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
  13. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
  14. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
  15. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
  16. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
  17. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
  18. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
  20. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
  21. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
  22. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
  23. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
  24. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
  25. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +3 -3
  26. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
  28. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
  29. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
  30. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
  31. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
  32. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
  33. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
  35. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
  37. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
  38. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  39. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
  42. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
  43. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
  45. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
  46. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
  47. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
  48. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
  49. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
  50. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
  51. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
  53. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  54. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
  55. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +312 -0
  56. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
  57. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +16 -14
  58. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
  60. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
  61. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
  62. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
  63. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +204 -0
  64. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
  65. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
  66. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +47 -8
  68. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
  70. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +175 -0
  71. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
  72. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +14 -15
  73. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
  75. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  76. package/speechflow-cli/dst/speechflow-node-t2t-google.d.ts +13 -0
  77. package/speechflow-cli/dst/speechflow-node-t2t-google.js +153 -0
  78. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -0
  79. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +80 -33
  80. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  81. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +78 -45
  82. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  83. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
  84. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  85. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +13 -14
  86. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +23 -27
  88. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  89. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
  90. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
  91. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  92. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
  93. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  94. package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
  95. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
  97. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
  98. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  99. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
  100. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  101. package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
  102. package/speechflow-cli/dst/speechflow-node.js +13 -2
  103. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  104. package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
  105. package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
  106. package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
  107. package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
  108. package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
  109. package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
  110. package/speechflow-cli/dst/speechflow-utils.d.ts +34 -0
  111. package/speechflow-cli/dst/speechflow-utils.js +256 -35
  112. package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
  113. package/speechflow-cli/dst/speechflow.js +75 -26
  114. package/speechflow-cli/dst/speechflow.js.map +1 -1
  115. package/speechflow-cli/etc/biome.jsonc +2 -1
  116. package/speechflow-cli/etc/oxlint.jsonc +113 -11
  117. package/speechflow-cli/etc/stx.conf +2 -2
  118. package/speechflow-cli/etc/tsconfig.json +1 -1
  119. package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
  120. package/speechflow-cli/package.json +103 -94
  121. package/speechflow-cli/src/lib.d.ts +24 -0
  122. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
  123. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
  124. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
  125. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
  126. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +3 -3
  127. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
  128. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
  129. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
  130. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
  131. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
  132. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
  133. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
  134. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
  135. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
  136. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
  137. package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +306 -0
  138. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +17 -15
  139. package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
  140. package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
  141. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
  142. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +15 -9
  143. package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +153 -0
  144. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +14 -15
  145. package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
  146. package/speechflow-cli/src/speechflow-node-t2t-google.ts +133 -0
  147. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +58 -44
  148. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +59 -58
  149. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
  150. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +18 -18
  151. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +28 -32
  152. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
  153. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
  154. package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
  155. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
  156. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
  157. package/speechflow-cli/src/speechflow-node.ts +21 -8
  158. package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
  159. package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
  160. package/speechflow-cli/src/speechflow-utils.ts +314 -32
  161. package/speechflow-cli/src/speechflow.ts +84 -33
  162. package/speechflow-ui-db/dst/app-font-fa-brands-400.woff2 +0 -0
  163. package/speechflow-ui-db/dst/app-font-fa-regular-400.woff2 +0 -0
  164. package/speechflow-ui-db/dst/app-font-fa-solid-900.woff2 +0 -0
  165. package/speechflow-ui-db/dst/app-font-fa-v4compatibility.woff2 +0 -0
  166. package/speechflow-ui-db/dst/index.css +2 -2
  167. package/speechflow-ui-db/dst/index.js +37 -38
  168. package/speechflow-ui-db/etc/eslint.mjs +0 -1
  169. package/speechflow-ui-db/etc/tsc-client.json +3 -3
  170. package/speechflow-ui-db/package.json +12 -11
  171. package/speechflow-ui-db/src/app.vue +20 -6
  172. package/speechflow-ui-st/dst/index.js +26 -26
  173. package/speechflow-ui-st/etc/eslint.mjs +0 -1
  174. package/speechflow-ui-st/etc/tsc-client.json +3 -3
  175. package/speechflow-ui-st/package.json +12 -11
  176. package/speechflow-ui-st/src/app.vue +5 -12
package/CHANGELOG.md CHANGED
@@ -2,6 +2,41 @@
2
2
  ChangeLog
3
3
  =========
4
4
 
5
+ 1.5.1 (2025-09-02)
6
+ ------------------
7
+
8
+ - IMPROVEMENT: add Google Translate node
9
+ - BUGFIX: improve error handling by ensuring we have always an Error object at hand
10
+ - UPDATE: upgrade NPM dependencies
11
+
12
+ 1.5.0 (2025-08-31)
13
+ ------------------
14
+
15
+ - IMPROVEMENT: add improved dashboard infrastructure and allow nodes to publish dashboard info
16
+ - IMPROVEMENT: add CLI option for exporting dashboard info via OSC
17
+ - IMPROVEMENT: add new audio processing nodes (compressor with sidechain, expander, gain, filler)
18
+ - IMPROVEMENT: add AWS integration nodes (Polly, Translate, Transcribe)
19
+ - IMPROVEMENT: add OpenAI Transcribe node for speech-to-text
20
+ - IMPROVEMENT: add noise suppression nodes (rnnoise, speex)
21
+ - IMPROVEMENT: provide audio helper utilities and access bus functionality
22
+ - IMPROVEMENT: improve types and error handling
23
+ - IMPROVEMENT: switch to GPT-5 with improved error handling and timeout support
24
+ - IMPROVEMENT: switch from native compressor to custom implementation
25
+ - BUGFIX: fix usage of AudioIO quit and abort methods
26
+ - BUGFIX: fix operator order in audio processing
27
+ - BUGFIX: reset envelope array when channels change
28
+ - BUGFIX: fix parameter configuration in audio nodes
29
+ - BUGFIX: fix private field access and remove unnecessary casts
30
+ - UPDATE: upgrade NPM dependencies
31
+ - UPDATE: update OxLint rules and configuration
32
+ - CLEANUP: cleanup and simplify code throughout project
33
+ - CLEANUP: cleanup expander node implementation and remove stereoLink feature
34
+ - CLEANUP: cleanup gender, ffmpeg, filler, and AWS nodes
35
+ - CLEANUP: reduce code depth in multiple components
36
+ - CLEANUP: align identifiers with remaining code
37
+ - CLEANUP: make code compliant with updated linter rules
38
+ - CLEANUP: fix indentation and remove duplicate entries
39
+
5
40
  1.4.5 (2025-08-07)
6
41
  ------------------
7
42
 
package/README.md CHANGED
@@ -31,10 +31,21 @@ remote MQTT network I/O,
31
31
  local Voice Activity Detection (VAD),
32
32
  local voice gender recognition,
33
33
  local audio LUFS-S/RMS metering,
34
+ local audio Speex noise suppression,
35
+ local audio RNNoise noise suppression,
36
+ local audio compressor processing,
37
+ local audio expander processing,
38
+ local audio gain processing,
39
+ local audio filler processing,
34
40
  remote-controlable local audio muting,
41
+ cloud-based [Amazon Transcribe](https://aws.amazon.com/transcribe/) speech-to-text conversion,
42
+ cloud-based [OpenAI GPT Transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe) speech-to-text conversion,
35
43
  cloud-based [Deepgram](https://deepgram.com) speech-to-text conversion,
36
44
  cloud-based [ElevenLabs](https://elevenlabs.io/) text-to-speech conversion,
45
+ cloud-based [Amazon Polly](https://aws.amazon.com/polly/) text-to-speech conversion,
37
46
  cloud-based [DeepL](https://deepl.com) text-to-text translation,
47
+ cloud-based [Amazon Translate](https://aws.amazon.com/translate/) text-to-text translation,
48
+ cloud-based [Google Cloud Translate](https://cloud.google.com/translate) text-to-text translation,
38
49
  cloud-based [OpenAI/GPT](https://openai.com) text-to-text translation (or spelling correction),
39
50
  local [Ollama/Gemma](https://ollama.com) text-to-text translation (or spelling correction),
40
51
  local [OPUS/ONNX](https://github.com/Helsinki-NLP/Opus-MT) text-to-text translation,
@@ -288,18 +299,30 @@ First a short overview of the available processing nodes:
288
299
  **mute**,
289
300
  **meter**,
290
301
  **vad**,
291
- **gender**.
302
+ **gender**,
303
+ **speex**,
304
+ **rrnoise**,
305
+ **compressor**,
306
+ **expander**,
307
+ **gain**,
308
+ **filler**.
292
309
  - Audio-to-Text nodes:
310
+ **openaitranscribe**,
311
+ **awstranscribe**,
293
312
  **deepgram**.
294
313
  - Text-to-Text nodes:
295
314
  **deepl**,
315
+ **awstranslate**,
296
316
  **openai**,
297
317
  **ollama**,
298
318
  **transformers**,
319
+ **google**,
299
320
  **subtitle**,
300
321
  **format**.
301
322
  - Text-to-Audio nodes:
323
+ **awspolly**.
302
324
  **elevenlabs**.
325
+ **kokoro**.
303
326
  - Any-to-Any nodes:
304
327
  **filter**,
305
328
  **trace**.
@@ -503,10 +526,160 @@ The following nodes process audio chunks only.
503
526
  | ----------- | --------- | -------- | ------------------------ |
504
527
  | **window** | 0 | 500 | *none* |
505
528
 
529
+ - Node: **speex**<br/>
530
+ Purpose: **Speex Noise Suppression node**<br/>
531
+ Example: `speex(attentuate: -18)`
532
+
533
+ > This node uses the Speex DSP pre-processor to perform noise
534
+ > suppression, i.e., it detects and attenuates (by a certain level of
535
+ > dB) the noise in the audio stream.
536
+
537
+ | Port | Payload |
538
+ | ------- | ----------- |
539
+ | input | audio |
540
+ | output | audio |
541
+
542
+ | Parameter | Position | Default | Requirement |
543
+ | ----------- | --------- | -------- | ------------------------ |
544
+ | **attentuate** | 0 | -18 | *none* | `-60 <= n <= 0` |
545
+
546
+ - Node: **rnnoise**<br/>
547
+ Purpose: **RNNoise Noise Suppression node**<br/>
548
+ Example: `rnnoise()`
549
+
550
+ > This node uses RNNoise to perform noise suppression, i.e., it
551
+ > detects and attenuates the noise in the audio stream.
552
+
553
+ | Port | Payload |
554
+ | ------- | ----------- |
555
+ | input | audio |
556
+ | output | audio |
557
+
558
+ | Parameter | Position | Default | Requirement |
559
+ | ----------- | --------- | -------- | ------------------------ |
560
+
561
+ - Node: **compressor**<br/>
562
+ Purpose: **audio compressor node**<br/>
563
+ Example: `compressor(thresholdDb: -18)`
564
+
565
+ > This node applies a dynamics compressor, i.e., it attenuates the
566
+ > volume by a certain ratio whenever the volume is above the threshold.
567
+
568
+ | Port | Payload |
569
+ | ------- | ----------- |
570
+ | input | audio |
571
+ | output | audio |
572
+
573
+ | Parameter | Position | Default | Requirement |
574
+ | ----------- | --------- | -------- | ------------------------ |
575
+ | **thresholdDb** | *none* | -18 | `n <= 0 && n >= -60` |
576
+ | **ratio** | *none* | 4 | `n >= 1 && n <= 20` |
577
+ | **attackMs** | *none* | 10 | `n >= 0 && n <= 100` |
578
+ | **releaseMs** | *none* | 50 | `n >= 0 && n <= 100` |
579
+ | **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
580
+ | **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
581
+
582
+ - Node: **expander**<br/>
583
+ Purpose: **audio expander node**<br/>
584
+ Example: `expander(thresholdDb: -46)`
585
+
586
+ > This node applies a dynamics expander, i.e., it attenuates the
587
+ > volume by a certain ratio whenever the volume is below the threshold.
588
+
589
+ | Port | Payload |
590
+ | ------- | ----------- |
591
+ | input | audio |
592
+ | output | audio |
593
+
594
+ | Parameter | Position | Default | Requirement |
595
+ | ----------- | --------- | -------- | ------------------------ |
596
+ | **thresholdDb** | *none* | -45 | `n <= 0 && n >= -60` |
597
+ | **ratio** | *none* | 4 | `n >= 1 && n <= 20` |
598
+ | **attackMs** | *none* | 10 | `n >= 0 && n <= 100` |
599
+ | **releaseMs** | *none* | 50 | `n >= 0 && n <= 100` |
600
+ | **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
601
+ | **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
602
+
603
+ - Node: **gain**<br/>
604
+ Purpose: **audio gain adjustment node**<br/>
605
+ Example: `gain(db: 12)`
606
+
607
+ > This node applies a gain adjustment to audio, i.e., it increases or
608
+ > decreases the volume by certain decibels
609
+
610
+ | Port | Payload |
611
+ | ------- | ----------- |
612
+ | input | audio |
613
+ | output | audio |
614
+
615
+ | Parameter | Position | Default | Requirement |
616
+ | ----------- | --------- | -------- | ------------------------ |
617
+ | **db** | *none* | 12 | `n >= -60 && n <= -60` |
618
+
619
+ - Node: **filler**<br/>
620
+ Purpose: **audio filler node**<br/>
621
+ Example: `filler()`
622
+
623
+ > This node adds missing audio frames of silence in order to fill
624
+ > the chronological gaps between generated audio frames (from
625
+ > text-to-speech).
626
+
627
+ | Port | Payload |
628
+ | ------- | ----------- |
629
+ | input | audio |
630
+ | output | audio |
631
+
632
+ | Parameter | Position | Default | Requirement |
633
+ | ----------- | --------- | -------- | ------------------------ |
634
+
506
635
  ### Audio-to-Text Nodes
507
636
 
508
637
  The following nodes convert audio to text chunks.
509
638
 
639
+ - Node: **openaitranscribe**<br/>
640
+ Purpose: **OpenAI/GPT Speech-to-Text conversion**<br/>
641
+ Example: `openaitranscribe(language: "de")`<br/>
642
+ Notice: this node requires an OpenAI API key!
643
+
644
+ > This node uses OpenAI GPT to perform Speech-to-Text (S2T)
645
+ > conversion, i.e., it recognizes speech in the input audio stream and
646
+ > outputs a corresponding text stream.
647
+
648
+ | Port | Payload |
649
+ | ------- | ----------- |
650
+ | input | text |
651
+ | output | text |
652
+
653
+ | Parameter | Position | Default | Requirement |
654
+ | ------------ | --------- | -------- | ------------------ |
655
+ | **key** | *none* | env.SPEECHFLOW\_OPENAI\_KEY | *none* |
656
+ | **api** | *none* | "https://api.openai.com" | `/^https?:\/\/.+?:\d+$/` |
657
+ | **model** | *none* | "gpt-4o-mini-transcribe" | *none* |
658
+ | **language** | *none* | "en" | `/^(?:de\|en)$/` |
659
+ | **interim** | *none* | false | *none* |
660
+
661
+ - Node: **awstranscribe**<br/>
662
+ Purpose: **Amazon Transcribe Speech-to-Text conversion**<br/>
663
+ Example: `awstranscribe(language: "de")`<br/>
664
+ Notice: this node requires an API key!
665
+
666
+ > This node uses Amazon Trancribe to perform Speech-to-Text (S2T)
667
+ > conversion, i.e., it recognizes speech in the input audio stream and
668
+ > outputs a corresponding text stream.
669
+
670
+ | Port | Payload |
671
+ | ------- | ----------- |
672
+ | input | audio |
673
+ | output | text |
674
+
675
+ | Parameter | Position | Default | Requirement |
676
+ | ------------ | --------- | -------- | ------------------ |
677
+ | **key** | *none* | env.SPEECHFLOW\_AMAZON\_KEY | *none* |
678
+ | **secKey** | *none* | env.SPEECHFLOW\_AMAZON\_KEY\_SEC | *none* |
679
+ | **region** | *none* | "eu-central-1" | *none* |
680
+ | **language** | *none* | "en" | `/^(?:en|de)$/` |
681
+ | **interim** | *none* | false | *none* |
682
+
510
683
  - Node: **deepgram**<br/>
511
684
  Purpose: **Deepgram Speech-to-Text conversion**<br/>
512
685
  Example: `deepgram(language: "de")`<br/>
@@ -551,6 +724,26 @@ The following nodes process text chunks only.
551
724
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
552
725
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
553
726
 
727
+ - Node: **awstranslate**<br/>
728
+ Purpose: **AWS Translate Text-to-Text translation**<br/>
729
+ Example: `awstranslate(src: "de", dst: "en")`<br/>
730
+ Notice: this node requires an API key!
731
+
732
+ > This node performs translation between English and German languages.
733
+
734
+ | Port | Payload |
735
+ | ------- | ----------- |
736
+ | input | text |
737
+ | output | text |
738
+
739
+ | Parameter | Position | Default | Requirement |
740
+ | ------------ | --------- | -------- | ------------------ |
741
+ | **key** | *none* | env.SPEECHFLOW\_AMAZON\_KEY | *none* |
742
+ | **secKey** | *none* | env.SPEECHFLOW\_AMAZON\_KEY\_SEC | *none* |
743
+ | **region** | *none* | "eu-central-1" | *none* |
744
+ | **src** | 0 | "de" | `/^(?:de\|en)$/` |
745
+ | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
746
+
554
747
  - Node: **openai**<br/>
555
748
  Purpose: **OpenAI/GPT Text-to-Text translation and spelling correction**<br/>
556
749
  Example: `openai(src: "de", dst: "en")`<br/>
@@ -616,6 +809,26 @@ The following nodes process text chunks only.
616
809
  | **src** | 0 | "de" | `/^(?:de\|en)$/` |
617
810
  | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
618
811
 
812
+ - Node: **google**<br/>
813
+ Purpose: **Google Cloud Translate Text-to-Text translation**<br/>
814
+ Example: `google(src: "de", dst: "en")`<br/>
815
+ Notice: this node requires a Google Cloud API key and project ID!
816
+
817
+ > This node performs translation between multiple languages
818
+ > in the text stream using Google Cloud Translate API.
819
+ > It supports German, English, French, and Italian languages.
820
+
821
+ | Port | Payload |
822
+ | ------- | ----------- |
823
+ | input | text |
824
+ | output | text |
825
+
826
+ | Parameter | Position | Default | Requirement |
827
+ | ------------ | --------- | -------- | ------------------ |
828
+ | **key** | *none* | env.SPEECHFLOW\_GOOGLE\_KEY | *none* |
829
+ | **src** | 0 | "de" | `/^(?:de\|en\|fr\|it)$/` |
830
+ | **dst** | 1 | "en" | `/^(?:de\|en\|fr\|it)$/` |
831
+
619
832
  - Node: **sentence**<br/>
620
833
  Purpose: **sentence splitting/merging**<br/>
621
834
  Example: `sentence()`<br/>
@@ -671,14 +884,36 @@ The following nodes process text chunks only.
671
884
 
672
885
  The following nodes convert text chunks to audio chunks.
673
886
 
887
+ - Node: **awspolly**<br/>
888
+ Purpose: **Amazon Polly Text-to-Speech conversion**<br/>
889
+ Example: `awspolly(language: "en", voice: "Danielle)`<br/>
890
+ Notice: this node requires an Amazon API key!
891
+
892
+ > This node uses Amazon Polly to perform Text-to-Speech (T2S)
893
+ > conversion, i.e., it converts the input text stream into an output
894
+ > audio stream. It is intended to generate speech.
895
+
896
+ | Port | Payload |
897
+ | ------- | ----------- |
898
+ | input | text |
899
+ | output | audio |
900
+
901
+ | Parameter | Position | Default | Requirement |
902
+ | -------------- | --------- | --------- | ------------------ |
903
+ | **key** | *none* | env.SPEECHFLOW\_AMAZON\_KEY | *none* |
904
+ | **secKey** | *none* | env.SPEECHFLOW\_AMAZON\_KEY\_SEC | *none* |
905
+ | **region** | *none* | "eu-central-1" | *none* |
906
+ | **voice** | 0 | "Amy" | `^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Viki|Daniel)$/` |
907
+ | **language** | 1 | "en" | `/^(?:de\|en)$/` |
908
+
674
909
  - Node: **elevenlabs**<br/>
675
910
  Purpose: **ElevenLabs Text-to-Speech conversion**<br/>
676
911
  Example: `elevenlabs(language: "en")`<br/>
677
912
  Notice: this node requires an ElevenLabs API key!
678
913
 
679
- > This node perform Text-to-Speech (T2S) conversion, i.e., it converts
680
- > the input text stream into an output audio stream. It is intended to
681
- > generate speech.
914
+ > This node uses ElevenLabs to perform Text-to-Speech (T2S)
915
+ > conversion, i.e., it converts the input text stream into an output
916
+ > audio stream. It is intended to generate speech.
682
917
 
683
918
  | Port | Payload |
684
919
  | ------- | ----------- |
@@ -700,9 +935,9 @@ The following nodes convert text chunks to audio chunks.
700
935
  Example: `kokoro(language: "en")`<br/>
701
936
  Notice: this currently support English language only!
702
937
 
703
- > This node perform Text-to-Speech (T2S) conversion, i.e., it converts
704
- > the input text stream into an output audio stream. It is intended to
705
- > generate speech.
938
+ > This node uses Kokoro to perform Text-to-Speech (T2S) conversion,
939
+ > i.e., it converts the input text stream into an output audio stream.
940
+ > It is intended to generate speech.
706
941
 
707
942
  | Port | Payload |
708
943
  | ------- | ----------- |
package/etc/claude.md ADDED
@@ -0,0 +1,70 @@
1
+
2
+ # CLAUDE.md
3
+
4
+ This file provides guidance to Claude Code (claude.ai/code) when working
5
+ with code in this repository.
6
+
7
+ ## Project Overview
8
+
9
+ SpeechFlow is a command-line interface tool for establishing directed
10
+ data flow graphs of audio and text processing nodes. It enables flexible
11
+ speech processing tasks including capturing audio, text-to-speech,
12
+ speech-to-text, and speech-to-speech translation.
13
+
14
+ ## Architecture
15
+
16
+ SpeechFlow uses a modular node-based architecture:
17
+
18
+ - **Core Engine**: TypeScript-based CLI tool that orchestrates processing flows
19
+ - **Processing Nodes**: Modular components for different speech processing tasks (see `src/speechflow-node-*.ts`)
20
+ - **Flow Expression Language**: Based on FlowLink for defining processing graphs
21
+ - **Web Interfaces**: Two Vue.js applications for dashboard and subtitle display
22
+ - **REST/WebSocket API**: External control interface for nodes
23
+
24
+ ### Key Components
25
+
26
+ - **Main CLI**:
27
+ `src/speechflow.ts` - Entry point and CLI parsing
28
+ - **Nodes**:
29
+ - Input/Output: `file`, `device`, `websocket`, `mqtt`
30
+ - Audio-to-Audio: `ffmpeg`, `wav`, `mute`, `meter`, `vad`, `gender`
31
+ - Audio-to-Text: `deepgram`
32
+ - Text-to-Text: `deepl`, `openai`, `ollama`, `transformers`, `subtitle`, `format`, `sentence`
33
+ - Text-to-Audio: `elevenlabs`, `kokoro`
34
+ - Any-to-Any: `filter`, `trace`
35
+
36
+ ## Development Commands
37
+
38
+ The project uses STX (Simple Task eXecutor) for build automation. Main commands:
39
+
40
+ ### Core Project
41
+
42
+ ```bash
43
+ npm start lint # Static code analysis (TypeScript, ESLint, Biome, Oxlint)
44
+ npm start build # Compile TypeScript to JavaScript in dst/
45
+ npm start dev # Multi-pane development dashboard with linting, building, and server
46
+ npm start server # Run the main speechflow program
47
+ npm start clean # Remove generated files
48
+ ```
49
+
50
+ ## Project Structure
51
+
52
+ - `src/` - Main TypeScript source files
53
+ - `dst/` - Compiled JavaScript output
54
+ - `etc/` - Configuration files (TypeScript, ESLint, Biome, etc.)
55
+ - `package.d/` - NPM package patches
56
+
57
+ ## Development Notes
58
+
59
+ - Node.js 22+ required
60
+ - Uses object-mode streaming with timestamps for audio/text processing
61
+ - External services integration: Deepgram, ElevenLabs, DeepL, OpenAI, Ollama
62
+ - Supports local processing: FFmpeg, WAV, Voice Activity Detection, Gender Detection
63
+ - REST/WebSocket API on port 8484 for external control
64
+
65
+ ## Configuration
66
+
67
+ Main configuration in `etc/speechflow.yaml` with example
68
+ processing graphs. Environment variables used for API keys (e.g.,
69
+ `SPEECHFLOW_DEEPGRAM_KEY`, `SPEECHFLOW_ELEVENLABS_KEY`).
70
+
@@ -23,7 +23,7 @@ pass-through: |
23
23
  transcription: |
24
24
  file(path: argv.0, mode: "r", type: "audio") |
25
25
  ffmpeg(src: "mp3", dst: "pcm") |
26
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
26
+ deepgram(language: "de") |
27
27
  format(width: 80) |
28
28
  file(path: argv.1, mode: "w", type: "text")
29
29
 
@@ -31,7 +31,7 @@ transcription: |
31
31
  captioning: |
32
32
  file(path: argv.0, mode: "r", type: "audio") |
33
33
  ffmpeg(src: "mp3", dst: "pcm") |
34
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
34
+ deepgram(language: "de") |
35
35
  subtitle(format: "vtt") |
36
36
  file(path: argv.1, mode: "w", type: "text")
37
37
 
@@ -39,7 +39,7 @@ captioning: |
39
39
  subtitling: |
40
40
  file(path: argv.0, mode: "r", type: "audio") |
41
41
  ffmpeg(src: "mp3", dst: "pcm") |
42
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) |
42
+ deepgram(language: "de") |
43
43
  deepl(src: "de", dst: "en") |
44
44
  subtitle(format: "vtt") |
45
45
  file(path: argv.1, mode: "w", type: "text")
@@ -62,14 +62,16 @@ speaking: |
62
62
  studio-transcription: |
63
63
  file(path: argv.0, mode: "r", type: "audio") | {
64
64
  ffmpeg(src: "mp3", dst: "pcm") | {
65
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY) | {
65
+ deepgram(language: "de") | {
66
66
  format(width: 80) |
67
67
  file(path: argv.1, mode: "w", type: "text"),
68
68
  subtitle(format: "vtt") |
69
69
  file(path: argv.2, mode: "w", type: "text"),
70
70
  subtitle(format: "srt") |
71
- file(path: argv.3, mode: "w", type: "text")
72
- elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en")
71
+ file(path: argv.3, mode: "w", type: "text"),
72
+ elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en") |
73
+ wav(mode: "encode") |
74
+ file(path: argv.4, mode: "w", type: "audio")
73
75
  }
74
76
  }
75
77
  }
@@ -82,7 +84,7 @@ studio-translation: |
82
84
  meter(interval: 250, dashboard: "meter1") |
83
85
  wav(mode: "encode") |
84
86
  file(path: "program-de.wav", mode: "w", type: "audio"),
85
- deepgram(language: "de", key: env.SPEECHFLOW_DEEPGRAM_KEY, interim: true) | {
87
+ deepgram(language: "de", key: interim: true) | {
86
88
  trace(name: "trace1", type: "text", dashboard: "text1")
87
89
  subtitle(format: "vtt", words: true) |
88
90
  file(path: "program-de.vtt", mode: "w", type: "text"),
@@ -90,7 +92,7 @@ studio-translation: |
90
92
  trace(name: "trace2", type: "text", notify: true, dashboard: "text2") |
91
93
  format(width: 80) |
92
94
  file(path: "program-de.txt", mode: "w", type: "text"),
93
- deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) | {
95
+ deepl(src: "de", dst: "en") | {
94
96
  trace(name: "trace3", type: "text", dashboard: "text3") | {
95
97
  format(width: 80) |
96
98
  file(path: "program-en.txt", mode: "w", type: "text"),
@@ -102,7 +104,7 @@ studio-translation: |
102
104
  filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
103
105
  elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
104
106
  } | {
105
- meter(interval: 250, dashboard: "meter2", dashboard: "meter2"),
107
+ meter(interval: 250, dashboard: "meter2"),
106
108
  wav(mode: "encode") |
107
109
  file(path: "program-en.wav", mode: "w", type: "audio"),
108
110
  device(device: "coreaudio:USBAudio2.0", mode: "w")
@@ -118,13 +120,13 @@ studio-translation: |
118
120
  test: |
119
121
  device(device: "coreaudio:Elgato Wave:3", mode: "r") |
120
122
  meter(interval: 50, dashboard: "meter1") |
121
- deepgram(language: "de", model: "nova-2", key: env.SPEECHFLOW_DEEPGRAM_KEY, interim: true) |
123
+ deepgram(language: "de", model: "nova-2", interim: true) |
122
124
  trace(type: "text", dashboard: "text1") | {
123
125
  subtitle(mode: "render", addr: "127.0.0.1", port: 8585),
124
126
  filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
125
127
  sentence() |
126
128
  trace(type: "text", dashboard: "text2") |
127
- deepl(src: "de", dst: "en", key: env.SPEECHFLOW_DEEPL_KEY) |
129
+ deepl(src: "de", dst: "en") |
128
130
  trace(type: "text", dashboard: "text3") |
129
131
  elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
130
132
  meter(interval: 50, dashboard: "meter2") |
package/etc/stx.conf CHANGED
@@ -17,6 +17,13 @@ upd
17
17
  (cd speechflow-ui-db && npx -y upd) && \
18
18
  (cd speechflow-ui-st && npx -y upd)
19
19
 
20
+ # [top-level] provide statistics about code base
21
+ cloc
22
+ cloc etc \
23
+ speechflow-cli/etc speechflow-cli/src \
24
+ speechflow-ui-db/etc speechflow-ui-db/src \
25
+ speechflow-ui-st/etc speechflow-ui-st/src
26
+
20
27
  # [top-level] lint components for development
21
28
  lint
22
29
  npm --prefix speechflow-cli start lint && \
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "speechflow",
3
- "version": "1.4.5",
4
- "x-stdver": "1.4.5-GA",
5
- "x-release": "2025-08-07",
3
+ "version": "1.5.1",
4
+ "x-stdver": "1.5.1-GA",
5
+ "x-release": "2025-09-02",
6
6
  "homepage": "https://github.com/rse/speechflow",
7
7
  "description": "Speech Processing Flow Graph",
8
+ "keywords": [ "speech", "audio", "flow", "graph" ],
8
9
  "license": "GPL-3.0-only",
9
10
  "author": {
10
11
  "name": "Dr. Ralf S. Engelschall",
@@ -16,17 +17,17 @@
16
17
  "url": "git+https://github.com/rse/speechflow.git"
17
18
  },
18
19
  "dependencies": {
19
- "@rse/stx": "1.0.7"
20
+ "@rse/stx": "1.0.9"
20
21
  },
21
22
  "devDependencies": {
22
23
  "nodemon": "3.1.10",
23
24
  "watch": "1.0.2",
24
- "concurrently": "9.2.0",
25
+ "concurrently": "9.2.1",
25
26
  "wait-on": "8.0.4",
26
27
  "cross-env": "10.0.0",
27
28
  "shx": "0.4.0"
28
29
  },
29
- "engines" : {
30
+ "engines": {
30
31
  "npm": ">=10.0.0",
31
32
  "node": ">=22.0.0"
32
33
  },