speechflow 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +220 -7
- package/etc/claude.md +70 -0
- package/etc/speechflow.yaml +5 -3
- package/etc/stx.conf +7 -0
- package/package.json +7 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +15 -13
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
- package/speechflow-cli/dst/speechflow-node.js +13 -2
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-utils.js +123 -35
- package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js +69 -14
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +112 -11
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/etc/tsconfig.json +1 -1
- package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
- package/speechflow-cli/package.json +102 -94
- package/speechflow-cli/src/lib.d.ts +24 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +15 -13
- package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
- package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
- package/speechflow-cli/src/speechflow-node.ts +21 -8
- package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
- package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
- package/speechflow-cli/src/speechflow-utils.ts +125 -32
- package/speechflow-cli/src/speechflow.ts +74 -17
- package/speechflow-ui-db/dst/index.js +31 -31
- package/speechflow-ui-db/etc/eslint.mjs +0 -1
- package/speechflow-ui-db/etc/tsc-client.json +3 -3
- package/speechflow-ui-db/package.json +11 -10
- package/speechflow-ui-db/src/app.vue +20 -6
- package/speechflow-ui-st/dst/index.js +26 -26
- package/speechflow-ui-st/etc/eslint.mjs +0 -1
- package/speechflow-ui-st/etc/tsc-client.json +3 -3
- package/speechflow-ui-st/package.json +11 -10
- package/speechflow-ui-st/src/app.vue +5 -12
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,34 @@
|
|
|
2
2
|
ChangeLog
|
|
3
3
|
=========
|
|
4
4
|
|
|
5
|
+
1.5.0 (2025-08-31)
|
|
6
|
+
------------------
|
|
7
|
+
|
|
8
|
+
- IMPROVEMENT: add improved dashboard infrastructure and allow nodes to publish dashboard info
|
|
9
|
+
- IMPROVEMENT: add CLI option for exporting dashboard info via OSC
|
|
10
|
+
- IMPROVEMENT: add new audio processing nodes (compressor with sidechain, expander, gain, filler)
|
|
11
|
+
- IMPROVEMENT: add AWS integration nodes (Polly, Translate, Transcribe)
|
|
12
|
+
- IMPROVEMENT: add OpenAI Transcribe node for speech-to-text
|
|
13
|
+
- IMPROVEMENT: add noise suppression nodes (rnnoise, speex)
|
|
14
|
+
- IMPROVEMENT: provide audio helper utilities and access bus functionality
|
|
15
|
+
- IMPROVEMENT: improve types and error handling
|
|
16
|
+
- IMPROVEMENT: switch to GPT-5 with improved error handling and timeout support
|
|
17
|
+
- IMPROVEMENT: switch from native compressor to custom implementation
|
|
18
|
+
- BUGFIX: fix usage of AudioIO quit and abort methods
|
|
19
|
+
- BUGFIX: fix operator order in audio processing
|
|
20
|
+
- BUGFIX: reset envelope array when channels change
|
|
21
|
+
- BUGFIX: fix parameter configuration in audio nodes
|
|
22
|
+
- BUGFIX: fix private field access and remove unnecessary casts
|
|
23
|
+
- UPDATE: upgrade NPM dependencies
|
|
24
|
+
- UPDATE: update OxLint rules and configuration
|
|
25
|
+
- CLEANUP: cleanup and simplify code throughout project
|
|
26
|
+
- CLEANUP: cleanup expander node implementation and remove stereoLink feature
|
|
27
|
+
- CLEANUP: cleanup gender, ffmpeg, filler, and AWS nodes
|
|
28
|
+
- CLEANUP: reduce code depth in multiple components
|
|
29
|
+
- CLEANUP: align identifiers with remaining code
|
|
30
|
+
- CLEANUP: make code compliant with updated linter rules
|
|
31
|
+
- CLEANUP: fix indentation and remove duplicate entries
|
|
32
|
+
|
|
5
33
|
1.4.5 (2025-08-07)
|
|
6
34
|
------------------
|
|
7
35
|
|
package/README.md
CHANGED
|
@@ -31,10 +31,20 @@ remote MQTT network I/O,
|
|
|
31
31
|
local Voice Activity Detection (VAD),
|
|
32
32
|
local voice gender recognition,
|
|
33
33
|
local audio LUFS-S/RMS metering,
|
|
34
|
+
local audio Speex noise suppression,
|
|
35
|
+
local audio RNNoise noise suppression,
|
|
36
|
+
local audio compressor processing,
|
|
37
|
+
local audio expander processing,
|
|
38
|
+
local audio gain processing,
|
|
39
|
+
local audio filler processing,
|
|
34
40
|
remote-controlable local audio muting,
|
|
41
|
+
cloud-based [Amazon Transcribe](https://aws.amazon.com/transcribe/) speech-to-text conversion,
|
|
42
|
+
cloud-based [OpenAI GPT Transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe) speech-to-text conversion,
|
|
35
43
|
cloud-based [Deepgram](https://deepgram.com) speech-to-text conversion,
|
|
36
44
|
cloud-based [ElevenLabs](https://elevenlabs.io/) text-to-speech conversion,
|
|
45
|
+
cloud-based [Amazon Polly](https://aws.amazon.com/polly/) text-to-speech conversion,
|
|
37
46
|
cloud-based [DeepL](https://deepl.com) text-to-text translation,
|
|
47
|
+
cloud-based [Amazon Translate](https://aws.amazon.com/translate/) text-to-text translation,
|
|
38
48
|
cloud-based [OpenAI/GPT](https://openai.com) text-to-text translation (or spelling correction),
|
|
39
49
|
local [Ollama/Gemma](https://ollama.com) text-to-text translation (or spelling correction),
|
|
40
50
|
local [OPUS/ONNX](https://github.com/Helsinki-NLP/Opus-MT) text-to-text translation,
|
|
@@ -288,18 +298,29 @@ First a short overview of the available processing nodes:
|
|
|
288
298
|
**mute**,
|
|
289
299
|
**meter**,
|
|
290
300
|
**vad**,
|
|
291
|
-
**gender
|
|
301
|
+
**gender**,
|
|
302
|
+
**speex**,
|
|
303
|
+
**rrnoise**,
|
|
304
|
+
**compressor**,
|
|
305
|
+
**expander**,
|
|
306
|
+
**gain**,
|
|
307
|
+
**filler**.
|
|
292
308
|
- Audio-to-Text nodes:
|
|
309
|
+
**openaitranscribe**,
|
|
310
|
+
**awstranscribe**,
|
|
293
311
|
**deepgram**.
|
|
294
312
|
- Text-to-Text nodes:
|
|
295
313
|
**deepl**,
|
|
314
|
+
**awstranslate**,
|
|
296
315
|
**openai**,
|
|
297
316
|
**ollama**,
|
|
298
317
|
**transformers**,
|
|
299
318
|
**subtitle**,
|
|
300
319
|
**format**.
|
|
301
320
|
- Text-to-Audio nodes:
|
|
321
|
+
**awspolly**.
|
|
302
322
|
**elevenlabs**.
|
|
323
|
+
**kokoro**.
|
|
303
324
|
- Any-to-Any nodes:
|
|
304
325
|
**filter**,
|
|
305
326
|
**trace**.
|
|
@@ -503,10 +524,160 @@ The following nodes process audio chunks only.
|
|
|
503
524
|
| ----------- | --------- | -------- | ------------------------ |
|
|
504
525
|
| **window** | 0 | 500 | *none* |
|
|
505
526
|
|
|
527
|
+
- Node: **speex**<br/>
|
|
528
|
+
Purpose: **Speex Noise Suppression node**<br/>
|
|
529
|
+
Example: `speex(attentuate: -18)`
|
|
530
|
+
|
|
531
|
+
> This node uses the Speex DSP pre-processor to perform noise
|
|
532
|
+
> suppression, i.e., it detects and attenuates (by a certain level of
|
|
533
|
+
> dB) the noise in the audio stream.
|
|
534
|
+
|
|
535
|
+
| Port | Payload |
|
|
536
|
+
| ------- | ----------- |
|
|
537
|
+
| input | audio |
|
|
538
|
+
| output | audio |
|
|
539
|
+
|
|
540
|
+
| Parameter | Position | Default | Requirement |
|
|
541
|
+
| ----------- | --------- | -------- | ------------------------ |
|
|
542
|
+
| **attentuate** | 0 | -18 | *none* | `-60 <= n <= 0` |
|
|
543
|
+
|
|
544
|
+
- Node: **rnnoise**<br/>
|
|
545
|
+
Purpose: **RNNoise Noise Suppression node**<br/>
|
|
546
|
+
Example: `rnnoise()`
|
|
547
|
+
|
|
548
|
+
> This node uses RNNoise to perform noise suppression, i.e., it
|
|
549
|
+
> detects and attenuates the noise in the audio stream.
|
|
550
|
+
|
|
551
|
+
| Port | Payload |
|
|
552
|
+
| ------- | ----------- |
|
|
553
|
+
| input | audio |
|
|
554
|
+
| output | audio |
|
|
555
|
+
|
|
556
|
+
| Parameter | Position | Default | Requirement |
|
|
557
|
+
| ----------- | --------- | -------- | ------------------------ |
|
|
558
|
+
|
|
559
|
+
- Node: **compressor**<br/>
|
|
560
|
+
Purpose: **audio compressor node**<br/>
|
|
561
|
+
Example: `compressor(thresholdDb: -18)`
|
|
562
|
+
|
|
563
|
+
> This node applies a dynamics compressor, i.e., it attenuates the
|
|
564
|
+
> volume by a certain ratio whenever the volume is above the threshold.
|
|
565
|
+
|
|
566
|
+
| Port | Payload |
|
|
567
|
+
| ------- | ----------- |
|
|
568
|
+
| input | audio |
|
|
569
|
+
| output | audio |
|
|
570
|
+
|
|
571
|
+
| Parameter | Position | Default | Requirement |
|
|
572
|
+
| ----------- | --------- | -------- | ------------------------ |
|
|
573
|
+
| **thresholdDb** | *none* | -18 | `n <= 0 && n >= -60` |
|
|
574
|
+
| **ratio** | *none* | 4 | `n >= 1 && n <= 20` |
|
|
575
|
+
| **attackMs** | *none* | 10 | `n >= 0 && n <= 100` |
|
|
576
|
+
| **releaseMs** | *none* | 50 | `n >= 0 && n <= 100` |
|
|
577
|
+
| **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
|
|
578
|
+
| **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
|
|
579
|
+
|
|
580
|
+
- Node: **expander**<br/>
|
|
581
|
+
Purpose: **audio expander node**<br/>
|
|
582
|
+
Example: `expander(thresholdDb: -46)`
|
|
583
|
+
|
|
584
|
+
> This node applies a dynamics expander, i.e., it attenuates the
|
|
585
|
+
> volume by a certain ratio whenever the volume is below the threshold.
|
|
586
|
+
|
|
587
|
+
| Port | Payload |
|
|
588
|
+
| ------- | ----------- |
|
|
589
|
+
| input | audio |
|
|
590
|
+
| output | audio |
|
|
591
|
+
|
|
592
|
+
| Parameter | Position | Default | Requirement |
|
|
593
|
+
| ----------- | --------- | -------- | ------------------------ |
|
|
594
|
+
| **thresholdDb** | *none* | -45 | `n <= 0 && n >= -60` |
|
|
595
|
+
| **ratio** | *none* | 4 | `n >= 1 && n <= 20` |
|
|
596
|
+
| **attackMs** | *none* | 10 | `n >= 0 && n <= 100` |
|
|
597
|
+
| **releaseMs** | *none* | 50 | `n >= 0 && n <= 100` |
|
|
598
|
+
| **kneeDb** | *none* | 6 | `n >= 0 && n <= 100` |
|
|
599
|
+
| **makeupDb** | *none* | 0 | `n >= 0 && n <= 100` |
|
|
600
|
+
|
|
601
|
+
- Node: **gain**<br/>
|
|
602
|
+
Purpose: **audio gain adjustment node**<br/>
|
|
603
|
+
Example: `gain(db: 12)`
|
|
604
|
+
|
|
605
|
+
> This node applies a gain adjustment to audio, i.e., it increases or
|
|
606
|
+
> decreases the volume by certain decibels
|
|
607
|
+
|
|
608
|
+
| Port | Payload |
|
|
609
|
+
| ------- | ----------- |
|
|
610
|
+
| input | audio |
|
|
611
|
+
| output | audio |
|
|
612
|
+
|
|
613
|
+
| Parameter | Position | Default | Requirement |
|
|
614
|
+
| ----------- | --------- | -------- | ------------------------ |
|
|
615
|
+
| **db** | *none* | 12 | `n >= -60 && n <= -60` |
|
|
616
|
+
|
|
617
|
+
- Node: **filler**<br/>
|
|
618
|
+
Purpose: **audio filler node**<br/>
|
|
619
|
+
Example: `filler()`
|
|
620
|
+
|
|
621
|
+
> This node adds missing audio frames of silence in order to fill
|
|
622
|
+
> the chronological gaps between generated audio frames (from
|
|
623
|
+
> text-to-speech).
|
|
624
|
+
|
|
625
|
+
| Port | Payload |
|
|
626
|
+
| ------- | ----------- |
|
|
627
|
+
| input | audio |
|
|
628
|
+
| output | audio |
|
|
629
|
+
|
|
630
|
+
| Parameter | Position | Default | Requirement |
|
|
631
|
+
| ----------- | --------- | -------- | ------------------------ |
|
|
632
|
+
|
|
506
633
|
### Audio-to-Text Nodes
|
|
507
634
|
|
|
508
635
|
The following nodes convert audio to text chunks.
|
|
509
636
|
|
|
637
|
+
- Node: **openaitranscribe**<br/>
|
|
638
|
+
Purpose: **OpenAI/GPT Speech-to-Text conversion**<br/>
|
|
639
|
+
Example: `openaitranscribe(language: "de")`<br/>
|
|
640
|
+
Notice: this node requires an OpenAI API key!
|
|
641
|
+
|
|
642
|
+
> This node uses OpenAI GPT to perform Speech-to-Text (S2T)
|
|
643
|
+
> conversion, i.e., it recognizes speech in the input audio stream and
|
|
644
|
+
> outputs a corresponding text stream.
|
|
645
|
+
|
|
646
|
+
| Port | Payload |
|
|
647
|
+
| ------- | ----------- |
|
|
648
|
+
| input | text |
|
|
649
|
+
| output | text |
|
|
650
|
+
|
|
651
|
+
| Parameter | Position | Default | Requirement |
|
|
652
|
+
| ------------ | --------- | -------- | ------------------ |
|
|
653
|
+
| **key** | *none* | env.SPEECHFLOW\_OPENAI\_KEY | *none* |
|
|
654
|
+
| **api** | *none* | "https://api.openai.com" | `/^https?:\/\/.+?:\d+$/` |
|
|
655
|
+
| **model** | *none* | "gpt-4o-mini-transcribe" | *none* |
|
|
656
|
+
| **language** | *none* | "en" | `/^(?:de\|en)$/` |
|
|
657
|
+
| **interim** | *none* | false | *none* |
|
|
658
|
+
|
|
659
|
+
- Node: **awstranscribe**<br/>
|
|
660
|
+
Purpose: **Amazon Transcribe Speech-to-Text conversion**<br/>
|
|
661
|
+
Example: `awstranscribe(language: "de")`<br/>
|
|
662
|
+
Notice: this node requires an API key!
|
|
663
|
+
|
|
664
|
+
> This node uses Amazon Trancribe to perform Speech-to-Text (S2T)
|
|
665
|
+
> conversion, i.e., it recognizes speech in the input audio stream and
|
|
666
|
+
> outputs a corresponding text stream.
|
|
667
|
+
|
|
668
|
+
| Port | Payload |
|
|
669
|
+
| ------- | ----------- |
|
|
670
|
+
| input | audio |
|
|
671
|
+
| output | text |
|
|
672
|
+
|
|
673
|
+
| Parameter | Position | Default | Requirement |
|
|
674
|
+
| ------------ | --------- | -------- | ------------------ |
|
|
675
|
+
| **key** | *none* | env.SPEECHFLOW\_AMAZON\_KEY | *none* |
|
|
676
|
+
| **secKey** | *none* | env.SPEECHFLOW\_AMAZON\_KEY\_SEC | *none* |
|
|
677
|
+
| **region** | *none* | "eu-central-1" | *none* |
|
|
678
|
+
| **language** | *none* | "en" | `/^(?:en|de)$/` |
|
|
679
|
+
| **interim** | *none* | false | *none* |
|
|
680
|
+
|
|
510
681
|
- Node: **deepgram**<br/>
|
|
511
682
|
Purpose: **Deepgram Speech-to-Text conversion**<br/>
|
|
512
683
|
Example: `deepgram(language: "de")`<br/>
|
|
@@ -551,6 +722,26 @@ The following nodes process text chunks only.
|
|
|
551
722
|
| **src** | 0 | "de" | `/^(?:de\|en)$/` |
|
|
552
723
|
| **dst** | 1 | "en" | `/^(?:de\|en)$/` |
|
|
553
724
|
|
|
725
|
+
- Node: **awstranslate**<br/>
|
|
726
|
+
Purpose: **AWS Translate Text-to-Text translation**<br/>
|
|
727
|
+
Example: `awstranslate(src: "de", dst: "en")`<br/>
|
|
728
|
+
Notice: this node requires an API key!
|
|
729
|
+
|
|
730
|
+
> This node performs translation between English and German languages.
|
|
731
|
+
|
|
732
|
+
| Port | Payload |
|
|
733
|
+
| ------- | ----------- |
|
|
734
|
+
| input | text |
|
|
735
|
+
| output | text |
|
|
736
|
+
|
|
737
|
+
| Parameter | Position | Default | Requirement |
|
|
738
|
+
| ------------ | --------- | -------- | ------------------ |
|
|
739
|
+
| **key** | *none* | env.SPEECHFLOW\_AMAZON\_KEY | *none* |
|
|
740
|
+
| **secKey** | *none* | env.SPEECHFLOW\_AMAZON\_KEY\_SEC | *none* |
|
|
741
|
+
| **region** | *none* | "eu-central-1" | *none* |
|
|
742
|
+
| **src** | 0 | "de" | `/^(?:de\|en)$/` |
|
|
743
|
+
| **dst** | 1 | "en" | `/^(?:de\|en)$/` |
|
|
744
|
+
|
|
554
745
|
- Node: **openai**<br/>
|
|
555
746
|
Purpose: **OpenAI/GPT Text-to-Text translation and spelling correction**<br/>
|
|
556
747
|
Example: `openai(src: "de", dst: "en")`<br/>
|
|
@@ -671,14 +862,36 @@ The following nodes process text chunks only.
|
|
|
671
862
|
|
|
672
863
|
The following nodes convert text chunks to audio chunks.
|
|
673
864
|
|
|
865
|
+
- Node: **awspolly**<br/>
|
|
866
|
+
Purpose: **Amazon Polly Text-to-Speech conversion**<br/>
|
|
867
|
+
Example: `awspolly(language: "en", voice: "Danielle)`<br/>
|
|
868
|
+
Notice: this node requires an Amazon API key!
|
|
869
|
+
|
|
870
|
+
> This node uses Amazon Polly to perform Text-to-Speech (T2S)
|
|
871
|
+
> conversion, i.e., it converts the input text stream into an output
|
|
872
|
+
> audio stream. It is intended to generate speech.
|
|
873
|
+
|
|
874
|
+
| Port | Payload |
|
|
875
|
+
| ------- | ----------- |
|
|
876
|
+
| input | text |
|
|
877
|
+
| output | audio |
|
|
878
|
+
|
|
879
|
+
| Parameter | Position | Default | Requirement |
|
|
880
|
+
| -------------- | --------- | --------- | ------------------ |
|
|
881
|
+
| **key** | *none* | env.SPEECHFLOW\_AMAZON\_KEY | *none* |
|
|
882
|
+
| **secKey** | *none* | env.SPEECHFLOW\_AMAZON\_KEY\_SEC | *none* |
|
|
883
|
+
| **region** | *none* | "eu-central-1" | *none* |
|
|
884
|
+
| **voice** | 0 | "Amy" | `^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Viki|Daniel)$/` |
|
|
885
|
+
| **language** | 1 | "en" | `/^(?:de\|en)$/` |
|
|
886
|
+
|
|
674
887
|
- Node: **elevenlabs**<br/>
|
|
675
888
|
Purpose: **ElevenLabs Text-to-Speech conversion**<br/>
|
|
676
889
|
Example: `elevenlabs(language: "en")`<br/>
|
|
677
890
|
Notice: this node requires an ElevenLabs API key!
|
|
678
891
|
|
|
679
|
-
> This node perform Text-to-Speech (T2S)
|
|
680
|
-
> the input text stream into an output
|
|
681
|
-
> generate speech.
|
|
892
|
+
> This node uses ElevenLabs to perform Text-to-Speech (T2S)
|
|
893
|
+
> conversion, i.e., it converts the input text stream into an output
|
|
894
|
+
> audio stream. It is intended to generate speech.
|
|
682
895
|
|
|
683
896
|
| Port | Payload |
|
|
684
897
|
| ------- | ----------- |
|
|
@@ -700,9 +913,9 @@ The following nodes convert text chunks to audio chunks.
|
|
|
700
913
|
Example: `kokoro(language: "en")`<br/>
|
|
701
914
|
Notice: this currently support English language only!
|
|
702
915
|
|
|
703
|
-
> This node perform Text-to-Speech (T2S) conversion,
|
|
704
|
-
> the input text stream into an output audio stream.
|
|
705
|
-
> generate speech.
|
|
916
|
+
> This node uses Kokoro to perform Text-to-Speech (T2S) conversion,
|
|
917
|
+
> i.e., it converts the input text stream into an output audio stream.
|
|
918
|
+
> It is intended to generate speech.
|
|
706
919
|
|
|
707
920
|
| Port | Payload |
|
|
708
921
|
| ------- | ----------- |
|
package/etc/claude.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
|
|
2
|
+
# CLAUDE.md
|
|
3
|
+
|
|
4
|
+
This file provides guidance to Claude Code (claude.ai/code) when working
|
|
5
|
+
with code in this repository.
|
|
6
|
+
|
|
7
|
+
## Project Overview
|
|
8
|
+
|
|
9
|
+
SpeechFlow is a command-line interface tool for establishing directed
|
|
10
|
+
data flow graphs of audio and text processing nodes. It enables flexible
|
|
11
|
+
speech processing tasks including capturing audio, text-to-speech,
|
|
12
|
+
speech-to-text, and speech-to-speech translation.
|
|
13
|
+
|
|
14
|
+
## Architecture
|
|
15
|
+
|
|
16
|
+
SpeechFlow uses a modular node-based architecture:
|
|
17
|
+
|
|
18
|
+
- **Core Engine**: TypeScript-based CLI tool that orchestrates processing flows
|
|
19
|
+
- **Processing Nodes**: Modular components for different speech processing tasks (see `src/speechflow-node-*.ts`)
|
|
20
|
+
- **Flow Expression Language**: Based on FlowLink for defining processing graphs
|
|
21
|
+
- **Web Interfaces**: Two Vue.js applications for dashboard and subtitle display
|
|
22
|
+
- **REST/WebSocket API**: External control interface for nodes
|
|
23
|
+
|
|
24
|
+
### Key Components
|
|
25
|
+
|
|
26
|
+
- **Main CLI**:
|
|
27
|
+
`src/speechflow.ts` - Entry point and CLI parsing
|
|
28
|
+
- **Nodes**:
|
|
29
|
+
- Input/Output: `file`, `device`, `websocket`, `mqtt`
|
|
30
|
+
- Audio-to-Audio: `ffmpeg`, `wav`, `mute`, `meter`, `vad`, `gender`
|
|
31
|
+
- Audio-to-Text: `deepgram`
|
|
32
|
+
- Text-to-Text: `deepl`, `openai`, `ollama`, `transformers`, `subtitle`, `format`, `sentence`
|
|
33
|
+
- Text-to-Audio: `elevenlabs`, `kokoro`
|
|
34
|
+
- Any-to-Any: `filter`, `trace`
|
|
35
|
+
|
|
36
|
+
## Development Commands
|
|
37
|
+
|
|
38
|
+
The project uses STX (Simple Task eXecutor) for build automation. Main commands:
|
|
39
|
+
|
|
40
|
+
### Core Project
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
npm start lint # Static code analysis (TypeScript, ESLint, Biome, Oxlint)
|
|
44
|
+
npm start build # Compile TypeScript to JavaScript in dst/
|
|
45
|
+
npm start dev # Multi-pane development dashboard with linting, building, and server
|
|
46
|
+
npm start server # Run the main speechflow program
|
|
47
|
+
npm start clean # Remove generated files
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Project Structure
|
|
51
|
+
|
|
52
|
+
- `src/` - Main TypeScript source files
|
|
53
|
+
- `dst/` - Compiled JavaScript output
|
|
54
|
+
- `etc/` - Configuration files (TypeScript, ESLint, Biome, etc.)
|
|
55
|
+
- `package.d/` - NPM package patches
|
|
56
|
+
|
|
57
|
+
## Development Notes
|
|
58
|
+
|
|
59
|
+
- Node.js 22+ required
|
|
60
|
+
- Uses object-mode streaming with timestamps for audio/text processing
|
|
61
|
+
- External services integration: Deepgram, ElevenLabs, DeepL, OpenAI, Ollama
|
|
62
|
+
- Supports local processing: FFmpeg, WAV, Voice Activity Detection, Gender Detection
|
|
63
|
+
- REST/WebSocket API on port 8484 for external control
|
|
64
|
+
|
|
65
|
+
## Configuration
|
|
66
|
+
|
|
67
|
+
Main configuration in `etc/speechflow.yaml` with example
|
|
68
|
+
processing graphs. Environment variables used for API keys (e.g.,
|
|
69
|
+
`SPEECHFLOW_DEEPGRAM_KEY`, `SPEECHFLOW_ELEVENLABS_KEY`).
|
|
70
|
+
|
package/etc/speechflow.yaml
CHANGED
|
@@ -68,8 +68,10 @@ studio-transcription: |
|
|
|
68
68
|
subtitle(format: "vtt") |
|
|
69
69
|
file(path: argv.2, mode: "w", type: "text"),
|
|
70
70
|
subtitle(format: "srt") |
|
|
71
|
-
file(path: argv.3, mode: "w", type: "text")
|
|
72
|
-
elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en")
|
|
71
|
+
file(path: argv.3, mode: "w", type: "text"),
|
|
72
|
+
elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en") |
|
|
73
|
+
wav(mode: "encode") |
|
|
74
|
+
file(path: argv.4, mode: "w", type: "audio")
|
|
73
75
|
}
|
|
74
76
|
}
|
|
75
77
|
}
|
|
@@ -102,7 +104,7 @@ studio-translation: |
|
|
|
102
104
|
filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
|
|
103
105
|
elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
|
|
104
106
|
} | {
|
|
105
|
-
meter(interval: 250, dashboard: "meter2"
|
|
107
|
+
meter(interval: 250, dashboard: "meter2"),
|
|
106
108
|
wav(mode: "encode") |
|
|
107
109
|
file(path: "program-en.wav", mode: "w", type: "audio"),
|
|
108
110
|
device(device: "coreaudio:USBAudio2.0", mode: "w")
|
package/etc/stx.conf
CHANGED
|
@@ -17,6 +17,13 @@ upd
|
|
|
17
17
|
(cd speechflow-ui-db && npx -y upd) && \
|
|
18
18
|
(cd speechflow-ui-st && npx -y upd)
|
|
19
19
|
|
|
20
|
+
# [top-level] provide statistics about code base
|
|
21
|
+
cloc
|
|
22
|
+
cloc etc \
|
|
23
|
+
speechflow-cli/etc speechflow-cli/src \
|
|
24
|
+
speechflow-ui-db/etc speechflow-ui-db/src \
|
|
25
|
+
speechflow-ui-st/etc speechflow-ui-st/src
|
|
26
|
+
|
|
20
27
|
# [top-level] lint components for development
|
|
21
28
|
lint
|
|
22
29
|
npm --prefix speechflow-cli start lint && \
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "speechflow",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"x-stdver": "1.
|
|
5
|
-
"x-release": "2025-08-
|
|
3
|
+
"version": "1.5.0",
|
|
4
|
+
"x-stdver": "1.5.0-GA",
|
|
5
|
+
"x-release": "2025-08-31",
|
|
6
6
|
"homepage": "https://github.com/rse/speechflow",
|
|
7
7
|
"description": "Speech Processing Flow Graph",
|
|
8
|
+
"keywords": [ "speech", "audio", "flow", "graph" ],
|
|
8
9
|
"license": "GPL-3.0-only",
|
|
9
10
|
"author": {
|
|
10
11
|
"name": "Dr. Ralf S. Engelschall",
|
|
@@ -16,17 +17,17 @@
|
|
|
16
17
|
"url": "git+https://github.com/rse/speechflow.git"
|
|
17
18
|
},
|
|
18
19
|
"dependencies": {
|
|
19
|
-
"@rse/stx": "1.0.
|
|
20
|
+
"@rse/stx": "1.0.9"
|
|
20
21
|
},
|
|
21
22
|
"devDependencies": {
|
|
22
23
|
"nodemon": "3.1.10",
|
|
23
24
|
"watch": "1.0.2",
|
|
24
|
-
"concurrently": "9.2.
|
|
25
|
+
"concurrently": "9.2.1",
|
|
25
26
|
"wait-on": "8.0.4",
|
|
26
27
|
"cross-env": "10.0.0",
|
|
27
28
|
"shx": "0.4.0"
|
|
28
29
|
},
|
|
29
|
-
"engines"
|
|
30
|
+
"engines": {
|
|
30
31
|
"npm": ">=10.0.0",
|
|
31
32
|
"node": ">=22.0.0"
|
|
32
33
|
},
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
|
+
const utils = __importStar(require("./speechflow-utils"));
|
|
42
|
+
/* downward compressor with soft knee */
|
|
43
|
+
class CompressorProcessor extends AudioWorkletProcessor {
|
|
44
|
+
/* internal state */
|
|
45
|
+
env = [];
|
|
46
|
+
sampleRate;
|
|
47
|
+
reduction = 0;
|
|
48
|
+
/* eslint no-undef: off */
|
|
49
|
+
static get parameterDescriptors() {
|
|
50
|
+
return [
|
|
51
|
+
{ name: "threshold", defaultValue: -23, minValue: -100, maxValue: 0, automationRate: "k-rate" }, // dBFS
|
|
52
|
+
{ name: "ratio", defaultValue: 4.0, minValue: 1.0, maxValue: 20, automationRate: "k-rate" }, // compression ratio
|
|
53
|
+
{ name: "attack", defaultValue: 0.010, minValue: 0.0, maxValue: 1, automationRate: "k-rate" }, // seconds
|
|
54
|
+
{ name: "release", defaultValue: 0.050, minValue: 0.0, maxValue: 1, automationRate: "k-rate" }, // seconds
|
|
55
|
+
{ name: "knee", defaultValue: 6.0, minValue: 0.0, maxValue: 40, automationRate: "k-rate" }, // dB
|
|
56
|
+
{ name: "makeup", defaultValue: 0.0, minValue: -24, maxValue: 24, automationRate: "k-rate" } // dB
|
|
57
|
+
];
|
|
58
|
+
}
|
|
59
|
+
/* class constructor for custom option processing */
|
|
60
|
+
constructor(options) {
|
|
61
|
+
super();
|
|
62
|
+
const { sampleRate } = options.processorOptions;
|
|
63
|
+
this.sampleRate = sampleRate;
|
|
64
|
+
}
|
|
65
|
+
/* determine gain difference */
|
|
66
|
+
gainDBFor(levelDB, thresholdDB, ratio, kneeDB) {
|
|
67
|
+
/* short-circuit for unreasonable ratio */
|
|
68
|
+
if (ratio <= 1.0)
|
|
69
|
+
return 0;
|
|
70
|
+
/* determine thresholds */
|
|
71
|
+
const halfKnee = kneeDB * 0.5;
|
|
72
|
+
const belowThr = levelDB < thresholdDB;
|
|
73
|
+
const aboveKnee = levelDB >= (thresholdDB + halfKnee);
|
|
74
|
+
/* short-circuit for no compression (below threshold) */
|
|
75
|
+
if (belowThr)
|
|
76
|
+
return 0;
|
|
77
|
+
/* apply soft-knee */
|
|
78
|
+
if (kneeDB > 0 && !aboveKnee) {
|
|
79
|
+
const x = (levelDB - thresholdDB) / kneeDB;
|
|
80
|
+
const idealGainDB = (thresholdDB + (levelDB - thresholdDB) / ratio) - levelDB;
|
|
81
|
+
return idealGainDB * x * x;
|
|
82
|
+
}
|
|
83
|
+
/* determine target level */
|
|
84
|
+
const targetOut = thresholdDB + (levelDB - thresholdDB) / ratio;
|
|
85
|
+
/* return gain difference */
|
|
86
|
+
return targetOut - levelDB;
|
|
87
|
+
}
|
|
88
|
+
/* update envelope (smoothed amplitude contour) for single channel */
|
|
89
|
+
updateEnvelopeForChannel(chan, samples, attack, release) {
|
|
90
|
+
/* fetch old envelope value */
|
|
91
|
+
if (this.env[chan] === undefined)
|
|
92
|
+
this.env[chan] = 1e-12;
|
|
93
|
+
let env = this.env[chan];
|
|
94
|
+
/* calculate attack/release alpha values */
|
|
95
|
+
const alphaA = Math.exp(-1 / (attack * this.sampleRate));
|
|
96
|
+
const alphaR = Math.exp(-1 / (release * this.sampleRate));
|
|
97
|
+
/* iterate over all samples and calculate RMS */
|
|
98
|
+
for (const s of samples) {
|
|
99
|
+
const x = Math.abs(s);
|
|
100
|
+
const det = x * x;
|
|
101
|
+
if (det > env)
|
|
102
|
+
env = alphaA * env + (1 - alphaA) * det;
|
|
103
|
+
else
|
|
104
|
+
env = alphaR * env + (1 - alphaR) * det;
|
|
105
|
+
}
|
|
106
|
+
this.env[chan] = Math.sqrt(Math.max(env, 1e-12));
|
|
107
|
+
}
|
|
108
|
+
/* process a single sample frame */
|
|
109
|
+
process(inputs, outputs, parameters) {
|
|
110
|
+
/* sanity check */
|
|
111
|
+
const input = inputs[0];
|
|
112
|
+
const output = outputs[0];
|
|
113
|
+
if (!input || input.length === 0 || !output)
|
|
114
|
+
return true;
|
|
115
|
+
/* determine number of channels */
|
|
116
|
+
const nCh = input.length;
|
|
117
|
+
/* initially just copy input to output (pass-through) */
|
|
118
|
+
for (let c = 0; c < output.length; c++) {
|
|
119
|
+
if (!output[c] || !input[c])
|
|
120
|
+
continue;
|
|
121
|
+
output[c].set(input[c]);
|
|
122
|
+
}
|
|
123
|
+
/* fetch parameters */
|
|
124
|
+
const thresholdDB = parameters["threshold"][0];
|
|
125
|
+
const ratio = parameters["ratio"][0];
|
|
126
|
+
const kneeDB = parameters["knee"][0];
|
|
127
|
+
const attackS = Math.max(parameters["attack"][0], 1 / this.sampleRate);
|
|
128
|
+
const releaseS = Math.max(parameters["release"][0], 1 / this.sampleRate);
|
|
129
|
+
const makeupDB = parameters["makeup"][0];
|
|
130
|
+
/* update envelope per channel */
|
|
131
|
+
for (let ch = 0; ch < nCh; ch++)
|
|
132
|
+
this.updateEnvelopeForChannel(ch, input[ch], attackS, releaseS);
|
|
133
|
+
/* determine linear value from decibel makeup value */
|
|
134
|
+
const makeUpLin = utils.dB2lin(makeupDB);
|
|
135
|
+
/* iterate over all channels */
|
|
136
|
+
this.reduction = 0;
|
|
137
|
+
for (let ch = 0; ch < nCh; ch++) {
|
|
138
|
+
const levelDB = utils.lin2dB(this.env[ch]);
|
|
139
|
+
const gainDB = this.gainDBFor(levelDB, thresholdDB, ratio, kneeDB);
|
|
140
|
+
const gainLin = utils.dB2lin(gainDB) * makeUpLin;
|
|
141
|
+
/* on first channel, calculate reduction */
|
|
142
|
+
if (ch === 0)
|
|
143
|
+
this.reduction = Math.min(0, gainDB);
|
|
144
|
+
/* apply gain change to channel */
|
|
145
|
+
const inp = input[ch];
|
|
146
|
+
const out = output[ch];
|
|
147
|
+
for (let i = 0; i < inp.length; i++)
|
|
148
|
+
out[i] = inp[i] * gainLin;
|
|
149
|
+
}
|
|
150
|
+
return true;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/* register the new audio nodes */
|
|
154
|
+
registerProcessor("compressor", CompressorProcessor);
|
|
155
|
+
//# sourceMappingURL=speechflow-node-a2a-compressor-wt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speechflow-node-a2a-compressor-wt.js","sourceRoot":"","sources":["../src/speechflow-node-a2a-compressor-wt.ts"],"names":[],"mappings":";AAAA;;;;EAIE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEF,0DAA2C;AAE3C,0CAA0C;AAC1C,MAAM,mBAAoB,SAAQ,qBAAqB;IACnD,sBAAsB;IACd,GAAG,GAAa,EAAE,CAAA;IAClB,UAAU,CAAQ;IACnB,SAAS,GAAG,CAAC,CAAA;IAEpB,2BAA2B;IAC3B,MAAM,KAAK,oBAAoB;QAC3B,OAAO;YACH,EAAE,IAAI,EAAE,WAAW,EAAG,YAAY,EAAE,CAAC,EAAE,EAAI,QAAQ,EAAE,CAAC,GAAG,EAAI,QAAQ,EAAE,CAAC,EAAI,cAAc,EAAE,QAAQ,EAAE,EAAE,OAAO;YAC/G,EAAE,IAAI,EAAE,OAAO,EAAO,YAAY,EAAE,GAAG,EAAI,QAAQ,EAAE,GAAG,EAAK,QAAQ,EAAE,EAAE,EAAG,cAAc,EAAE,QAAQ,EAAE,EAAE,oBAAoB;YAC5H,EAAE,IAAI,EAAE,QAAQ,EAAM,YAAY,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAK,QAAQ,EAAE,CAAC,EAAI,cAAc,EAAE,QAAQ,EAAE,EAAE,UAAU;YAClH,EAAE,IAAI,EAAE,SAAS,EAAK,YAAY,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAK,QAAQ,EAAE,CAAC,EAAI,cAAc,EAAE,QAAQ,EAAE,EAAE,UAAU;YAClH,EAAE,IAAI,EAAE,MAAM,EAAQ,YAAY,EAAE,GAAG,EAAI,QAAQ,EAAE,GAAG,EAAK,QAAQ,EAAE,EAAE,EAAG,cAAc,EAAE,QAAQ,EAAE,EAAE,KAAK;YAC7G,EAAE,IAAI,EAAE,QAAQ,EAAM,YAAY,EAAE,GAAG,EAAI,QAAQ,EAAE,CAAC,EAAE,EAAK,QAAQ,EAAE,EAAE,EAAG,cAAc,EAAE,QAAQ,EAAE,CAAE,KAAK;SAChH,CAAA;IACL,CAAC;IAED,sDAAsD;IACtD,YAAa,OAAY;QACrB,KAAK,EAAE,CAAA;QACP,MAAM,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC,gBAAgB,CAAA;QAC/C,IAAI,CAAC,UAAU,GAAG,UAAoB,CAAA;IAC1C,CAAC;IAED,iCAAiC;IACzB,SAAS,CAAE,OAAe,EAAE,WAAmB,EAAE,KAAa,EAAE,MAAc;QAClF,4CAA4C;QAC5C,IAAI,KAAK,IAAI,GAAG;YACZ,OAAO,CAAC,CAAA;QAEZ,4BAA4B;QAC5B,MAAM,QAAQ,GAAI,MAAM,GAAG,GAAG,CAAA;QAC9B,MAAM,QAAQ,GAAI,OAAO,GAAG,WAAW,CAAA;QACvC,MAAM,SAAS,GAAG,OAAO,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,CAAA;QAErD,0DAA0D;QAC1D,IAAI,QAAQ;YACR,OAAO,CAAC,CAAA;QAEZ,uBAAuB;QACvB,IAAI,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YAC3B,MAAM,CAAC,GAAG,CAAC,OAAO,GAAG,WAAW,CAAC,GAAG,MAAM,CAAA;YAC1C,MAAM,WAAW,GAAG,CAAC,WAAW,GAAG,CAAC,OAAO,GAAG,WAAW,CAAC,GAAG,KAAK,CAAC,GAAG,OAAO,CAAA;YAC7E,OAAO,WAAW,GAAG,CAAC,GAAG,CAAC,CAAA;QAC9B,CAAC;QAED,8BAA8B;QAC9B,MAAM,SAAS,GAAG,WAAW,GAAG,CAAC,OAAO,GAAG,WAAW,CAAC,GAAG,KAAK,CAAA;QAE/D,8BAA8B;QAC9B,OAAO,SAAS,GAAG,OAAO,CAAA;IAC9B,CAAC;IAED,uEAAuE;IAC/D,wBAAwB,CAC5B,IAAsB,EACtB,OAA4B,EAC5B,MAAsB,EACtB,OAAsB;QAEtB,gCAAgC;QAChC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,SAAS;YAC5B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAA;QAC1B,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAExB,6CAA6C;QAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAI,IAAI,CAAC,UAAU,CAAC,CAAC,CAAA;QACzD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAA;QAEzD,kDAAkD;QAClD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACtB,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YACrB,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,CAAA;YACjB,IAAI,GAAG,GAAG,GAAG;gBACT,GAAG,GAAG,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,CAAA;;gBAEvC,GAAG,GAAG,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,CAAA;QAC/C,CAAC;QACD,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,qCAAqC;IACrC,OAAO,CACH,MAA4B,EAC5B,OAA4B,EAC5B,UAAwC;QAExC,oBAAoB;QACpB,MAAM,KAAK,GAAI,MAAM,CAAC,CAAC,CAAC,CAAA;QACxB,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAA;QACzB,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM;YACvC,OAAO,IAAI,CAAA;QAEf,oCAAoC;QACpC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAA;QAExB,0DAA0D;QAC1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;gBACvB,SAAQ;YACZ,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;QAC3B,CAAC;QAED,wBAAwB;QACxB,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;QAC9C,MAAM,KAAK,GAAS,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAA;QAC1C,MAAM,MAAM,GAAQ,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;QACzC,MAAM,OAAO,GAAO,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAG,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAA;QAC3E,MAAM,QAAQ,GAAM,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAA;QAC3E,MAAM,QAAQ,GAAM,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAA;QAE3C,mCAAmC;QACnC,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,GAAG,EAAE,EAAE,EAAE;YAC3B,IAAI,CAAC,wBAAwB,CAAC,EAAE,EAAE,KAAK,CAAC,EAAE,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAA;QAEnE,uDAAuD;QACvD,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;QAExC,iCAAiC;QACjC,IAAI,CAAC,SAAS,GAAG,CAAC,CAAA;QAClB,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC;YAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAI,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,CAAC,CAAA;YACnE,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,SAAS,CAAA;YAEhD,6CAA6C;YAC7C,IAAI,EAAE,KAAK,CAAC;gBACR,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,CAAA;YAExC,oCAAoC;YACpC,MAAM,GAAG,GAAG,KAAK,CAAC,EAAE,CAAC,CAAA;YACrB,MAAM,GAAG,GAAG,MAAM,CAAC,EAAE,CAAC,CAAA;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;gBAC/B,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,CAAA;QACjC,CAAC;QACD,OAAO,IAAI,CAAA;IACf,CAAC;CACJ;AAED,oCAAoC;AACpC,iBAAiB,CAAC,YAAY,EAAE,mBAAmB,CAAC,CAAA"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeCompressor extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private destroyed;
|
|
5
|
+
private compressor;
|
|
6
|
+
private bus;
|
|
7
|
+
private intervalId;
|
|
8
|
+
constructor(id: string, cfg: {
|
|
9
|
+
[id: string]: any;
|
|
10
|
+
}, opts: {
|
|
11
|
+
[id: string]: any;
|
|
12
|
+
}, args: any[]);
|
|
13
|
+
open(): Promise<void>;
|
|
14
|
+
close(): Promise<void>;
|
|
15
|
+
}
|