speechflow 1.5.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +192 -171
- package/etc/claude.md +83 -46
- package/etc/speechflow.yaml +84 -84
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-main-api.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-main-api.js +319 -0
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -0
- package/speechflow-cli/dst/speechflow-main-cli.d.ts +28 -0
- package/speechflow-cli/dst/speechflow-main-cli.js +271 -0
- package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -0
- package/speechflow-cli/dst/speechflow-main-config.d.ts +9 -0
- package/speechflow-cli/dst/speechflow-main-config.js +27 -0
- package/speechflow-cli/dst/speechflow-main-config.js.map +1 -0
- package/speechflow-cli/dst/speechflow-main-graph.d.ts +34 -0
- package/speechflow-cli/dst/speechflow-main-graph.js +367 -0
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -0
- package/speechflow-cli/dst/speechflow-main-nodes.d.ts +10 -0
- package/speechflow-cli/dst/speechflow-main-nodes.js +60 -0
- package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -0
- package/speechflow-cli/dst/speechflow-main-status.d.ts +11 -0
- package/speechflow-cli/dst/speechflow-main-status.js +60 -0
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -0
- package/speechflow-cli/dst/speechflow-main.d.ts +7 -0
- package/speechflow-cli/dst/speechflow-main.js +127 -0
- package/speechflow-cli/dst/speechflow-main.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +4 -4
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +8 -9
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +5 -5
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +6 -6
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +5 -5
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +5 -5
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-a2t-awstranscribe.d.ts → speechflow-node-a2t-amazon.d.ts} +1 -1
- package/speechflow-cli/dst/{speechflow-node-a2t-awstranscribe.js → speechflow-node-a2t-amazon.js} +11 -11
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-a2t-openaitranscribe.d.ts → speechflow-node-a2t-openai.d.ts} +1 -1
- package/speechflow-cli/dst/{speechflow-node-a2t-openaitranscribe.js → speechflow-node-a2t-openai.js} +11 -11
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2a-awspolly.d.ts → speechflow-node-t2a-amazon.d.ts} +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2a-awspolly.js → speechflow-node-t2a-amazon.js} +9 -9
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +7 -7
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-awstranslate.d.ts → speechflow-node-t2t-amazon.d.ts} +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-awstranslate.js → speechflow-node-t2t-amazon.js} +7 -7
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-a2a-dynamics.d.ts → speechflow-node-t2t-modify.d.ts} +1 -5
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +111 -0
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +5 -5
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +3 -3
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +8 -8
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +50 -29
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +7 -7
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +10 -10
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-utils-audio-wt.js → speechflow-util-audio-wt.js} +1 -1
- package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +22 -0
- package/speechflow-cli/dst/speechflow-util-audio.js +251 -0
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-error.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-util-error.js +131 -0
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +68 -0
- package/speechflow-cli/dst/speechflow-util-queue.js +338 -0
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-util-stream.js +219 -0
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-utils-audio.js → speechflow-util-webaudio.js} +2 -2
- package/speechflow-cli/dst/speechflow-util-webaudio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util.d.ts +4 -0
- package/speechflow-cli/dst/speechflow-util.js +26 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -0
- package/speechflow-cli/dst/speechflow.js +3 -906
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +4 -1
- package/speechflow-cli/package.json +12 -11
- package/speechflow-cli/src/speechflow-main-api.ts +315 -0
- package/speechflow-cli/src/speechflow-main-cli.ts +259 -0
- package/speechflow-cli/src/speechflow-main-config.ts +17 -0
- package/speechflow-cli/src/speechflow-main-graph.ts +372 -0
- package/speechflow-cli/src/speechflow-main-nodes.ts +61 -0
- package/speechflow-cli/src/speechflow-main-status.ts +70 -0
- package/speechflow-cli/src/speechflow-main.ts +106 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -4
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +7 -8
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +5 -5
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +7 -8
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +7 -7
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +4 -4
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +4 -4
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -2
- package/speechflow-cli/src/{speechflow-node-a2t-awstranscribe.ts → speechflow-node-a2t-amazon.ts} +10 -10
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +7 -7
- package/speechflow-cli/src/{speechflow-node-a2t-openaitranscribe.ts → speechflow-node-a2t-openai.ts} +10 -10
- package/speechflow-cli/src/{speechflow-node-t2a-awspolly.ts → speechflow-node-t2a-amazon.ts} +7 -7
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +4 -4
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +6 -6
- package/speechflow-cli/src/{speechflow-node-t2t-awstranslate.ts → speechflow-node-t2t-amazon.ts} +5 -5
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +4 -4
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +7 -7
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +84 -0
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +4 -4
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +4 -4
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +4 -4
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +9 -9
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +4 -4
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -4
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-device.ts +7 -7
- package/speechflow-cli/src/speechflow-node-xio-file.ts +49 -28
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +7 -7
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
- package/speechflow-cli/src/{speechflow-utils-audio.ts → speechflow-util-audio.ts} +131 -1
- package/speechflow-cli/src/speechflow-util-error.ts +184 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +320 -0
- package/speechflow-cli/src/speechflow-util-stream.ts +197 -0
- package/speechflow-cli/src/speechflow-util.ts +10 -0
- package/speechflow-cli/src/speechflow.ts +3 -947
- package/speechflow-ui-db/package.json +3 -3
- package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
- package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
- package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
- package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
- package/speechflow-ui-st/dst/index.css +2 -2
- package/speechflow-ui-st/dst/index.js +32 -33
- package/speechflow-ui-st/package.json +4 -4
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +0 -208
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +0 -312
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +0 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +0 -1
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +0 -1
- package/speechflow-cli/dst/speechflow-utils.d.ts +0 -108
- package/speechflow-cli/dst/speechflow-utils.js +0 -740
- package/speechflow-cli/dst/speechflow-utils.js.map +0 -1
- package/speechflow-cli/src/speechflow-utils.ts +0 -804
- /package/speechflow-cli/dst/{speechflow-node-a2a-dynamics-wt.d.ts → speechflow-util-audio-wt.d.ts} +0 -0
- /package/speechflow-cli/dst/{speechflow-utils-audio-wt.d.ts → speechflow-util-webaudio-wt.d.ts} +0 -0
- /package/speechflow-cli/dst/{speechflow-utils-audio.d.ts → speechflow-util-webaudio.d.ts} +0 -0
- /package/speechflow-cli/src/{speechflow-utils-audio-wt.ts → speechflow-util-audio-wt.ts} +0 -0
package/etc/claude.md
CHANGED
|
@@ -1,70 +1,107 @@
|
|
|
1
|
-
|
|
2
1
|
# CLAUDE.md
|
|
3
2
|
|
|
4
|
-
This file provides guidance to Claude Code (claude.ai/code) when working
|
|
5
|
-
with code in this repository.
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
6
4
|
|
|
7
5
|
## Project Overview
|
|
8
6
|
|
|
9
|
-
SpeechFlow is a command-line interface tool for establishing directed
|
|
10
|
-
data flow graphs of audio and text processing nodes. It enables flexible
|
|
11
|
-
speech processing tasks including capturing audio, text-to-speech,
|
|
12
|
-
speech-to-text, and speech-to-speech translation.
|
|
7
|
+
SpeechFlow is a command-line interface tool for establishing directed data flow graphs of audio and text processing nodes. It enables flexible speech processing tasks including capturing audio, text-to-speech, speech-to-text, and speech-to-speech translation.
|
|
13
8
|
|
|
14
9
|
## Architecture
|
|
15
10
|
|
|
16
|
-
SpeechFlow uses a modular node-based architecture:
|
|
11
|
+
SpeechFlow uses a modular node-based architecture with three main components:
|
|
17
12
|
|
|
18
|
-
- **Core
|
|
19
|
-
- **
|
|
20
|
-
- **
|
|
21
|
-
- **Web Interfaces**: Two Vue.js applications for dashboard and subtitle display
|
|
22
|
-
- **REST/WebSocket API**: External control interface for nodes
|
|
13
|
+
- **speechflow-cli**: Core TypeScript-based CLI engine that orchestrates processing flows
|
|
14
|
+
- **speechflow-ui-db**: Dashboard UI component for real-time visualization
|
|
15
|
+
- **speechflow-ui-st**: Subtitle UI component for displaying live subtitles
|
|
23
16
|
|
|
24
|
-
###
|
|
17
|
+
### Processing Node Categories
|
|
25
18
|
|
|
26
|
-
- **
|
|
27
|
-
|
|
28
|
-
- **
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
- Text-to-Text: `deepl`, `openai`, `ollama`, `transformers`, `subtitle`, `format`, `sentence`
|
|
33
|
-
- Text-to-Audio: `elevenlabs`, `kokoro`
|
|
34
|
-
- Any-to-Any: `filter`, `trace`
|
|
19
|
+
- **Input/Output (xio)**: file, device, websocket, mqtt
|
|
20
|
+
- **Audio-to-Audio (a2a)**: ffmpeg, wav, mute, meter, vad, gender, gain, filler, compressor, expander, rnnoise, speex
|
|
21
|
+
- **Audio-to-Text (a2t)**: deepgram, amazon, openai
|
|
22
|
+
- **Text-to-Text (t2t)**: deepl, google, amazon, openai, ollama, transformers, subtitle, format, sentence, modify
|
|
23
|
+
- **Text-to-Audio (t2a)**: elevenlabs, kokoro, amazon
|
|
24
|
+
- **Any-to-Any (x2x)**: filter, trace
|
|
35
25
|
|
|
36
26
|
## Development Commands
|
|
37
27
|
|
|
38
|
-
The project uses STX (Simple Task eXecutor) for build automation. Main commands:
|
|
39
|
-
|
|
40
|
-
### Core Project
|
|
41
|
-
|
|
42
28
|
```bash
|
|
43
|
-
|
|
44
|
-
npm start
|
|
45
|
-
npm start
|
|
46
|
-
npm start server # Run the main speechflow program
|
|
29
|
+
# Top-level commands (from root directory)
|
|
30
|
+
npm start lint # Lint all components (TypeScript, ESLint, Biome, Oxlint)
|
|
31
|
+
npm start build # Build all components (full production build)
|
|
47
32
|
npm start clean # Remove generated files
|
|
33
|
+
npm start upd # Update all NPM dependencies
|
|
34
|
+
|
|
35
|
+
# Component-specific development (from speechflow-cli/)
|
|
36
|
+
npm start dev # Multi-pane dashboard with linting, building, and server
|
|
37
|
+
npm start lint # Static code analysis
|
|
38
|
+
npm start build # Compile TypeScript to JavaScript
|
|
39
|
+
npm start server # Run the main speechflow program
|
|
40
|
+
npm start clean # Clean generated files
|
|
41
|
+
|
|
42
|
+
# Testing
|
|
43
|
+
npm start test # Run test configuration with sample pipeline
|
|
48
44
|
```
|
|
49
45
|
|
|
50
|
-
##
|
|
46
|
+
## Key Implementation Files
|
|
47
|
+
|
|
48
|
+
### Core Engine
|
|
49
|
+
- `speechflow-cli/src/speechflow.ts` - Main CLI entry point and orchestration
|
|
50
|
+
- `speechflow-cli/src/speechflow-node.ts` - Base node class with stream processing
|
|
51
|
+
- `speechflow-cli/src/speechflow-utils.ts` - Utility functions and helpers
|
|
52
|
+
|
|
53
|
+
### Node Implementations
|
|
54
|
+
All node implementations follow the pattern `speechflow-node-{category}-{name}.ts` in `speechflow-cli/src/`.
|
|
51
55
|
|
|
52
|
-
|
|
53
|
-
-
|
|
54
|
-
-
|
|
55
|
-
-
|
|
56
|
+
### Stream Processing Architecture
|
|
57
|
+
- Uses Node.js object-mode streams with timestamp metadata
|
|
58
|
+
- Audio chunks: PCM format, 16-bit, 16kHz, mono
|
|
59
|
+
- Text chunks: Include timing information and metadata (gender, final/interim)
|
|
60
|
+
- All streams maintain chronological timestamps for synchronization
|
|
56
61
|
|
|
57
|
-
##
|
|
62
|
+
## API Integration
|
|
58
63
|
|
|
59
|
-
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
|
|
64
|
+
REST/WebSocket API available on port 8484 (configurable) for:
|
|
65
|
+
- External node control (muting, configuration)
|
|
66
|
+
- Real-time metrics (audio levels, text flow)
|
|
67
|
+
- Dashboard and UI connectivity
|
|
68
|
+
|
|
69
|
+
## Environment Configuration
|
|
70
|
+
|
|
71
|
+
Key environment variables for service integrations:
|
|
72
|
+
- `SPEECHFLOW_DEEPGRAM_KEY` - Deepgram API key
|
|
73
|
+
- `SPEECHFLOW_ELEVENLABS_KEY` - ElevenLabs API key
|
|
74
|
+
- `SPEECHFLOW_DEEPL_KEY` - DeepL API key
|
|
75
|
+
- `SPEECHFLOW_OPENAI_KEY` - OpenAI API key
|
|
76
|
+
- `SPEECHFLOW_GOOGLE_KEY` - Google Cloud API key
|
|
77
|
+
- `SPEECHFLOW_AWS_ACCESS_KEY_ID` - AWS access key
|
|
78
|
+
- `SPEECHFLOW_AWS_SECRET_ACCESS_KEY` - AWS secret key
|
|
79
|
+
- `SPEECHFLOW_AWS_REGION` - AWS region
|
|
80
|
+
- `SPEECHFLOW_DEVICE_MIC` - Microphone device identifier
|
|
81
|
+
- `SPEECHFLOW_DEVICE_SPK` - Speaker device identifier
|
|
82
|
+
|
|
83
|
+
## Flow Expression Language
|
|
84
|
+
|
|
85
|
+
Based on FlowLink with support for:
|
|
86
|
+
- Sequential pipelines: `node1 | node2 | node3`
|
|
87
|
+
- Parallel branches: `node1, node2, node3`
|
|
88
|
+
- Grouping: `{ node1 | node2 }`
|
|
89
|
+
- Parameters: `node(param1: value, param2: "string")`
|
|
90
|
+
- Environment variables: `env.VARIABLE_NAME`
|
|
91
|
+
- Command arguments: `argv.0`, `argv.1`
|
|
92
|
+
|
|
93
|
+
## Testing Approach
|
|
94
|
+
|
|
95
|
+
Run tests using the test configuration:
|
|
96
|
+
```bash
|
|
97
|
+
npm start test
|
|
98
|
+
```
|
|
64
99
|
|
|
65
|
-
|
|
100
|
+
This executes a sample pipeline defined in `etc/speechflow.yaml` with dashboard visualization.
|
|
66
101
|
|
|
67
|
-
|
|
68
|
-
processing graphs. Environment variables used for API keys (e.g.,
|
|
69
|
-
`SPEECHFLOW_DEEPGRAM_KEY`, `SPEECHFLOW_ELEVENLABS_KEY`).
|
|
102
|
+
## Important Patterns
|
|
70
103
|
|
|
104
|
+
1. **Stream Processing**: All nodes extend `SpeechFlowNode` and implement `process()` method for stream transformation
|
|
105
|
+
2. **Error Handling**: Nodes emit errors via stream events, captured and logged centrally
|
|
106
|
+
3. **Timestamp Preservation**: Audio/text chunks maintain timing for synchronization across pipeline
|
|
107
|
+
4. **Meta Information**: Chunks carry metadata (gender, final/interim status) for downstream filtering
|
package/etc/speechflow.yaml
CHANGED
|
@@ -4,74 +4,74 @@
|
|
|
4
4
|
|
|
5
5
|
# Capture and meter audio from microphone device into WAV audio file
|
|
6
6
|
capturing: |
|
|
7
|
-
device(device:
|
|
8
|
-
vad() |
|
|
9
|
-
meter(1000) |
|
|
10
|
-
wav(mode: "encode") |
|
|
11
|
-
file(path: "capture.wav", mode: "w", type: "audio")
|
|
7
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
|
|
8
|
+
a2a-vad() |
|
|
9
|
+
a2a-meter(1000) |
|
|
10
|
+
a2a-wav(mode: "encode") |
|
|
11
|
+
xio-file(path: "capture.wav", mode: "w", type: "audio")
|
|
12
12
|
|
|
13
13
|
# Pass-through audio from microphone device to speaker
|
|
14
14
|
# device and in parallel record it to WAV audio file
|
|
15
15
|
pass-through: |
|
|
16
|
-
device(device:
|
|
17
|
-
wav(mode: "encode") |
|
|
18
|
-
file(path: "capture.wav", mode: "w", type: "audio"),
|
|
19
|
-
device(device:
|
|
16
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
|
|
17
|
+
a2a-wav(mode: "encode") |
|
|
18
|
+
xio-file(path: "capture.wav", mode: "w", type: "audio"),
|
|
19
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
# Generate text file with German transcription of German MP3 audio file
|
|
23
23
|
transcription: |
|
|
24
|
-
file(path: argv.0, mode: "r", type: "audio") |
|
|
25
|
-
ffmpeg(src: "mp3", dst: "pcm") |
|
|
26
|
-
deepgram(language: "de") |
|
|
27
|
-
format(width: 80) |
|
|
28
|
-
file(path: argv.1, mode: "w", type: "text")
|
|
24
|
+
xio-file(path: argv.0, mode: "r", type: "audio") |
|
|
25
|
+
a2a-ffmpeg(src: "mp3", dst: "pcm") |
|
|
26
|
+
a2t-deepgram(language: "de") |
|
|
27
|
+
t2t-format(width: 80) |
|
|
28
|
+
xio-file(path: argv.1, mode: "w", type: "text")
|
|
29
29
|
|
|
30
30
|
# Generate WebVTT file with German closed captions of German MP3 audio file
|
|
31
31
|
captioning: |
|
|
32
|
-
file(path: argv.0, mode: "r", type: "audio") |
|
|
33
|
-
ffmpeg(src: "mp3", dst: "pcm") |
|
|
34
|
-
deepgram(language: "de") |
|
|
35
|
-
subtitle(format: "vtt") |
|
|
36
|
-
file(path: argv.1, mode: "w", type: "text")
|
|
32
|
+
xio-file(path: argv.0, mode: "r", type: "audio") |
|
|
33
|
+
a2a-ffmpeg(src: "mp3", dst: "pcm") |
|
|
34
|
+
a2t-deepgram(language: "de") |
|
|
35
|
+
t2t-subtitle(format: "vtt") |
|
|
36
|
+
xio-file(path: argv.1, mode: "w", type: "text")
|
|
37
37
|
|
|
38
38
|
# Generate WebVTT file with English subtitles of German MP3 audio file
|
|
39
39
|
subtitling: |
|
|
40
|
-
file(path: argv.0, mode: "r", type: "audio") |
|
|
41
|
-
ffmpeg(src: "mp3", dst: "pcm") |
|
|
42
|
-
deepgram(language: "de") |
|
|
43
|
-
deepl(src: "de", dst: "en") |
|
|
44
|
-
subtitle(format: "vtt") |
|
|
45
|
-
file(path: argv.1, mode: "w", type: "text")
|
|
40
|
+
xio-file(path: argv.0, mode: "r", type: "audio") |
|
|
41
|
+
a2a-ffmpeg(src: "mp3", dst: "pcm") |
|
|
42
|
+
a2t-deepgram(language: "de") |
|
|
43
|
+
t2t-deepl(src: "de", dst: "en") |
|
|
44
|
+
t2t-subtitle(format: "vtt") |
|
|
45
|
+
xio-file(path: argv.1, mode: "w", type: "text")
|
|
46
46
|
|
|
47
47
|
# Ad-Hoc text translation from German to English
|
|
48
48
|
translation: |
|
|
49
|
-
file(path: "-", mode: "r", type: "text") |
|
|
50
|
-
deepl(src: "de", dst: "en") |
|
|
51
|
-
file(path: "-", mode: "w", type: "text")
|
|
49
|
+
xio-file(path: "-", mode: "r", type: "text") |
|
|
50
|
+
t2t-deepl(src: "de", dst: "en") |
|
|
51
|
+
xio-file(path: "-", mode: "w", type: "text")
|
|
52
52
|
|
|
53
53
|
# Generate audio file with English voice for a text file
|
|
54
54
|
speaking: |
|
|
55
|
-
file(path: argv.0, mode: "r", type: "text") |
|
|
56
|
-
kokoro(language: "en") |
|
|
57
|
-
wav(mode: "encode") |
|
|
58
|
-
file(path: argv.1, mode: "w", type: "audio")
|
|
55
|
+
xio-file(path: argv.0, mode: "r", type: "text") |
|
|
56
|
+
t2a-kokoro(language: "en") |
|
|
57
|
+
a2a-wav(mode: "encode") |
|
|
58
|
+
xio-file(path: argv.1, mode: "w", type: "audio")
|
|
59
59
|
|
|
60
60
|
# Batch studio transcription from German to English,
|
|
61
61
|
# including the capturing of all involved inputs and outputs:
|
|
62
62
|
studio-transcription: |
|
|
63
|
-
file(path: argv.0, mode: "r", type: "audio") | {
|
|
64
|
-
ffmpeg(src: "mp3", dst: "pcm") | {
|
|
65
|
-
deepgram(language: "de") | {
|
|
66
|
-
format(width: 80) |
|
|
67
|
-
file(path: argv.1, mode: "w", type: "text"),
|
|
68
|
-
subtitle(format: "vtt") |
|
|
69
|
-
file(path: argv.2, mode: "w", type: "text"),
|
|
70
|
-
subtitle(format: "srt") |
|
|
71
|
-
file(path: argv.3, mode: "w", type: "text"),
|
|
72
|
-
elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en") |
|
|
73
|
-
wav(mode: "encode") |
|
|
74
|
-
file(path: argv.4, mode: "w", type: "audio")
|
|
63
|
+
xio-file(path: argv.0, mode: "r", type: "audio") | {
|
|
64
|
+
a2a-ffmpeg(src: "mp3", dst: "pcm") | {
|
|
65
|
+
a2t-deepgram(language: "de") | {
|
|
66
|
+
t2t-format(width: 80) |
|
|
67
|
+
xio-file(path: argv.1, mode: "w", type: "text"),
|
|
68
|
+
t2t-subtitle(format: "vtt") |
|
|
69
|
+
xio-file(path: argv.2, mode: "w", type: "text"),
|
|
70
|
+
t2t-subtitle(format: "srt") |
|
|
71
|
+
xio-file(path: argv.3, mode: "w", type: "text"),
|
|
72
|
+
t2a-elevenlabs(voice: "Mark", optimize: "quality", speed: 1.05, language: "en") |
|
|
73
|
+
a2a-wav(mode: "encode") |
|
|
74
|
+
xio-file(path: argv.4, mode: "w", type: "audio")
|
|
75
75
|
}
|
|
76
76
|
}
|
|
77
77
|
}
|
|
@@ -79,35 +79,35 @@ studio-transcription: |
|
|
|
79
79
|
# Real-time studio translation from German to English,
|
|
80
80
|
# including the capturing of all involved inputs and outputs:
|
|
81
81
|
studio-translation: |
|
|
82
|
-
device(device:
|
|
83
|
-
gender() | {
|
|
84
|
-
meter(interval: 250, dashboard: "meter1") |
|
|
85
|
-
wav(mode: "encode") |
|
|
86
|
-
file(path: "program-de.wav", mode: "w", type: "audio"),
|
|
87
|
-
deepgram(language: "de", key: interim: true) | {
|
|
88
|
-
trace(name: "trace1", type: "text", dashboard: "text1")
|
|
89
|
-
subtitle(format: "vtt", words: true) |
|
|
90
|
-
file(path: "program-de.vtt", mode: "w", type: "text"),
|
|
91
|
-
sentence() | {
|
|
92
|
-
trace(name: "trace2", type: "text", notify: true, dashboard: "text2") |
|
|
93
|
-
format(width: 80) |
|
|
94
|
-
file(path: "program-de.txt", mode: "w", type: "text"),
|
|
95
|
-
deepl(src: "de", dst: "en") | {
|
|
96
|
-
trace(name: "trace3", type: "text", dashboard: "text3") | {
|
|
97
|
-
format(width: 80) |
|
|
98
|
-
file(path: "program-en.txt", mode: "w", type: "text"),
|
|
99
|
-
subtitle(format: "vtt", words: false) |
|
|
100
|
-
file(path: "program-en.vtt", mode: "w", type: "text"),
|
|
82
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") | {
|
|
83
|
+
a2a-gender() | {
|
|
84
|
+
a2a-meter(interval: 250, dashboard: "meter1") |
|
|
85
|
+
a2a-wav(mode: "encode") |
|
|
86
|
+
xio-file(path: "program-de.wav", mode: "w", type: "audio"),
|
|
87
|
+
a2t-deepgram(language: "de", key: interim: true) | {
|
|
88
|
+
x2x-trace(name: "trace1", type: "text", dashboard: "text1")
|
|
89
|
+
t2t-subtitle(format: "vtt", words: true) |
|
|
90
|
+
xio-file(path: "program-de.vtt", mode: "w", type: "text"),
|
|
91
|
+
t2t-sentence() | {
|
|
92
|
+
x2x-trace(name: "trace2", type: "text", notify: true, dashboard: "text2") |
|
|
93
|
+
t2t-format(width: 80) |
|
|
94
|
+
xio-file(path: "program-de.txt", mode: "w", type: "text"),
|
|
95
|
+
t2t-deepl(src: "de", dst: "en") | {
|
|
96
|
+
x2x-trace(name: "trace3", type: "text", dashboard: "text3") | {
|
|
97
|
+
t2t-format(width: 80) |
|
|
98
|
+
xio-file(path: "program-en.txt", mode: "w", type: "text"),
|
|
99
|
+
t2t-subtitle(format: "vtt", words: false) |
|
|
100
|
+
xio-file(path: "program-en.vtt", mode: "w", type: "text"),
|
|
101
101
|
{
|
|
102
|
-
filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
|
|
103
|
-
elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
|
|
104
|
-
filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
|
|
105
|
-
elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
|
|
102
|
+
x2x-filter(name: "S2T-male", type: "text", var: "meta:gender", op: "==", val: "male") |
|
|
103
|
+
t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en"),
|
|
104
|
+
x2x-filter(name: "S2T-female", type: "text", var: "meta:gender", op: "==", val: "female") |
|
|
105
|
+
t2a-elevenlabs(voice: "Brittney", optimize: "latency", speed: 1.05, language: "en")
|
|
106
106
|
} | {
|
|
107
|
-
meter(interval: 250, dashboard: "meter2"),
|
|
108
|
-
wav(mode: "encode") |
|
|
109
|
-
file(path: "program-en.wav", mode: "w", type: "audio"),
|
|
110
|
-
device(device:
|
|
107
|
+
a2a-meter(interval: 250, dashboard: "meter2"),
|
|
108
|
+
a2a-wav(mode: "encode") |
|
|
109
|
+
xio-file(path: "program-en.wav", mode: "w", type: "audio"),
|
|
110
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
|
|
111
111
|
}
|
|
112
112
|
}
|
|
113
113
|
}
|
|
@@ -118,18 +118,18 @@ studio-translation: |
|
|
|
118
118
|
|
|
119
119
|
# Test-drive for development
|
|
120
120
|
test: |
|
|
121
|
-
device(device:
|
|
122
|
-
meter(interval: 50, dashboard: "meter1") |
|
|
123
|
-
deepgram(language: "de", model: "nova-2", interim: true) |
|
|
124
|
-
trace(type: "text", dashboard: "text1") | {
|
|
125
|
-
subtitle(mode: "render", addr: "127.0.0.1", port: 8585),
|
|
126
|
-
filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
|
|
127
|
-
sentence() |
|
|
128
|
-
trace(type: "text", dashboard: "text2") |
|
|
129
|
-
deepl(src: "de", dst: "en") |
|
|
130
|
-
trace(type: "text", dashboard: "text3") |
|
|
131
|
-
elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
|
|
132
|
-
meter(interval: 50, dashboard: "meter2") |
|
|
133
|
-
device(device:
|
|
121
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_MIC, mode: "r") |
|
|
122
|
+
a2a-meter(interval: 50, dashboard: "meter1") |
|
|
123
|
+
a2t-deepgram(language: "de", model: "nova-2", interim: true) |
|
|
124
|
+
x2x-trace(type: "text", dashboard: "text1") | {
|
|
125
|
+
t2t-subtitle(mode: "render", addr: "127.0.0.1", port: 8585),
|
|
126
|
+
x2x-filter(name: "final", type: "text", var: "kind", op: "==", val: "final") |
|
|
127
|
+
t2t-sentence() |
|
|
128
|
+
x2x-trace(type: "text", dashboard: "text2") |
|
|
129
|
+
t2t-deepl(src: "de", dst: "en") |
|
|
130
|
+
x2x-trace(type: "text", dashboard: "text3") |
|
|
131
|
+
t2a-elevenlabs(voice: "Mark", optimize: "latency", speed: 1.05, language: "en") |
|
|
132
|
+
a2a-meter(interval: 50, dashboard: "meter2") |
|
|
133
|
+
xio-device(device: env.SPEECHFLOW_DEVICE_SPK, mode: "w")
|
|
134
134
|
}
|
|
135
135
|
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "speechflow",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"x-stdver": "1.
|
|
5
|
-
"x-release": "2025-09-
|
|
3
|
+
"version": "1.6.1",
|
|
4
|
+
"x-stdver": "1.6.1-GA",
|
|
5
|
+
"x-release": "2025-09-06",
|
|
6
6
|
"homepage": "https://github.com/rse/speechflow",
|
|
7
7
|
"description": "Speech Processing Flow Graph",
|
|
8
8
|
"keywords": [ "speech", "audio", "flow", "graph" ],
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import CLIio from "cli-io";
|
|
2
|
+
import { CLIOptions } from "./speechflow-main-cli";
|
|
3
|
+
import { NodeGraph } from "./speechflow-main-graph";
|
|
4
|
+
export declare class APIServer {
|
|
5
|
+
private cli;
|
|
6
|
+
private wsPeers;
|
|
7
|
+
private hapi;
|
|
8
|
+
private sendOSC;
|
|
9
|
+
constructor(cli: CLIio);
|
|
10
|
+
start(args: CLIOptions, graph: NodeGraph): Promise<void>;
|
|
11
|
+
stop(args: CLIOptions): Promise<void>;
|
|
12
|
+
}
|