speechflow 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -0
- package/dst/speechflow-node-deepgram.d.ts +10 -0
- package/dst/speechflow-node-deepgram.js +44 -23
- package/dst/speechflow-node-deepl.d.ts +10 -0
- package/dst/speechflow-node-deepl.js +30 -12
- package/dst/speechflow-node-device.d.ts +11 -0
- package/dst/speechflow-node-device.js +73 -14
- package/dst/speechflow-node-elevenlabs.d.ts +10 -0
- package/dst/speechflow-node-elevenlabs.js +14 -2
- package/dst/speechflow-node-ffmpeg.d.ts +11 -0
- package/dst/speechflow-node-ffmpeg.js +114 -0
- package/dst/speechflow-node-file.d.ts +9 -0
- package/dst/speechflow-node-file.js +71 -13
- package/dst/speechflow-node-gemma.d.ts +11 -0
- package/dst/speechflow-node-gemma.js +152 -0
- package/dst/speechflow-node-websocket.d.ts +11 -0
- package/dst/speechflow-node-websocket.js +34 -6
- package/dst/speechflow-node.d.ts +38 -0
- package/dst/speechflow-node.js +28 -10
- package/dst/speechflow.d.ts +1 -0
- package/dst/speechflow.js +128 -43
- package/etc/tsconfig.json +2 -0
- package/package.json +24 -10
- package/src/speechflow-node-deepgram.ts +55 -24
- package/src/speechflow-node-deepl.ts +38 -16
- package/src/speechflow-node-device.ts +88 -14
- package/src/speechflow-node-elevenlabs.ts +19 -2
- package/src/speechflow-node-ffmpeg.ts +122 -0
- package/src/speechflow-node-file.ts +76 -14
- package/src/speechflow-node-gemma.ts +169 -0
- package/src/speechflow-node-websocket.ts +52 -13
- package/src/speechflow-node.ts +43 -21
- package/src/speechflow.ts +142 -46
- package/dst/speechflow-util.js +0 -37
- package/src/speechflow-util.ts +0 -36
package/README.md
CHANGED
|
@@ -141,6 +141,20 @@ Currently **SpeechFlow** provides the following processing nodes:
|
|
|
141
141
|
| **device** | 0 | *none* | `/^(.+?):(.+)$/` |
|
|
142
142
|
| **mode** | 1 | "rw" | `/^(?:r\|w\|rw)$/` |
|
|
143
143
|
|
|
144
|
+
- Node: **ffmpeg**<br/>
|
|
145
|
+
Purpose: **FFmpeg audio format conversion**<br/>
|
|
146
|
+
Example: `ffmpeg(src: "pcm", dst: "mp3")`
|
|
147
|
+
|
|
148
|
+
| Port | Payload |
|
|
149
|
+
| ------- | ----------- |
|
|
150
|
+
| input | audio |
|
|
151
|
+
| output | audio |
|
|
152
|
+
|
|
153
|
+
| Parameter | Position | Default | Requirement |
|
|
154
|
+
| ----------- | --------- | -------- | ------------------ |
|
|
155
|
+
| **src** | 0 | "pcm" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
|
|
156
|
+
| **dst** | 1 | "wav" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
|
|
157
|
+
|
|
144
158
|
- Node: **deepgram**<br/>
|
|
145
159
|
Purpose: **Deepgram Speech-to-Text conversion**<br/>
|
|
146
160
|
Example: `deepgram(language: "de")`<br/>
|
|
@@ -174,6 +188,22 @@ Currently **SpeechFlow** provides the following processing nodes:
|
|
|
174
188
|
| **src** | 0 | "de" | `/^(?:de\|en-US)$/` |
|
|
175
189
|
| **dst** | 1 | "en-US" | `/^(?:de\|en-US)$/` |
|
|
176
190
|
|
|
191
|
+
- Node: **gemma**<br/>
|
|
192
|
+
Purpose: **Google Gemma Text-to-Text translation**<br/>
|
|
193
|
+
Example: `gemma(src: "de", dst: "en")`<br/>
|
|
194
|
+
Notice; this node requires the Ollama API!
|
|
195
|
+
|
|
196
|
+
| Port | Payload |
|
|
197
|
+
| ------- | ----------- |
|
|
198
|
+
| input | text |
|
|
199
|
+
| output | text |
|
|
200
|
+
|
|
201
|
+
| Parameter | Position | Default | Requirement |
|
|
202
|
+
| ------------ | --------- | -------- | ------------------ |
|
|
203
|
+
| **url** | *none* | "http://127.0.0.1:11434" | `/^https?:\/\/.+?:\d+$/` |
|
|
204
|
+
| **src** | 0 | "de" | `/^(?:de\|en)$/` |
|
|
205
|
+
| **dst** | 1 | "en" | `/^(?:de\|en)$/` |
|
|
206
|
+
|
|
177
207
|
- Node: **elevenlabs**<br/>
|
|
178
208
|
Purpose: **ElevenLabs Text-to-Speech conversion**<br/>
|
|
179
209
|
Example: `elevenlabs(language: "en")`<br/>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private dg;
|
|
5
|
+
constructor(id: string, opts: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, args: any[]);
|
|
8
|
+
open(): Promise<void>;
|
|
9
|
+
close(): Promise<void>;
|
|
10
|
+
}
|
|
@@ -41,30 +41,41 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
41
41
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
42
|
};
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
44
45
|
const node_events_1 = require("node:events");
|
|
46
|
+
/* external dependencies */
|
|
45
47
|
const node_stream_1 = __importDefault(require("node:stream"));
|
|
46
48
|
const Deepgram = __importStar(require("@deepgram/sdk"));
|
|
49
|
+
/* internal dependencies */
|
|
47
50
|
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
|
|
48
|
-
|
|
51
|
+
/* SpeechFlow node for Deepgram speech-to-text conversion */
|
|
52
|
+
class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
|
|
53
|
+
/* declare official node name */
|
|
54
|
+
static name = "deepgram";
|
|
55
|
+
/* internal state */
|
|
49
56
|
dg = null;
|
|
57
|
+
/* construct node */
|
|
50
58
|
constructor(id, opts, args) {
|
|
51
59
|
super(id, opts, args);
|
|
60
|
+
/* declare node configuration parameters */
|
|
52
61
|
this.configure({
|
|
53
62
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
|
|
54
|
-
model: { type: "string", val: "nova-
|
|
63
|
+
model: { type: "string", val: "nova-3", pos: 0 },
|
|
55
64
|
version: { type: "string", val: "latest", pos: 1 },
|
|
56
|
-
language: { type: "string", val: "
|
|
65
|
+
language: { type: "string", val: "multi", pos: 2 }
|
|
57
66
|
});
|
|
58
|
-
|
|
59
|
-
async open() {
|
|
67
|
+
/* declare node input/output format */
|
|
60
68
|
this.input = "audio";
|
|
61
69
|
this.output = "text";
|
|
62
|
-
|
|
70
|
+
}
|
|
71
|
+
/* open node */
|
|
72
|
+
async open() {
|
|
63
73
|
/* sanity check situation */
|
|
64
74
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
65
75
|
throw new Error("Deepgram node currently supports PCM-S16LE audio only");
|
|
66
|
-
/*
|
|
76
|
+
/* create queue for results */
|
|
67
77
|
const queue = new node_events_1.EventEmitter();
|
|
78
|
+
/* connect to Deepgram API */
|
|
68
79
|
const deepgram = Deepgram.createClient(this.params.key);
|
|
69
80
|
this.dg = deepgram.listen.live({
|
|
70
81
|
model: this.params.model,
|
|
@@ -74,7 +85,7 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
|
74
85
|
sample_rate: this.config.audioSampleRate,
|
|
75
86
|
encoding: "linear16",
|
|
76
87
|
multichannel: false,
|
|
77
|
-
|
|
88
|
+
endpointing: 10,
|
|
78
89
|
interim_results: false,
|
|
79
90
|
smart_format: true,
|
|
80
91
|
punctuate: true,
|
|
@@ -83,26 +94,31 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
|
83
94
|
numerals: true,
|
|
84
95
|
paragraphs: true,
|
|
85
96
|
profanity_filter: true,
|
|
86
|
-
utterances: false
|
|
87
|
-
});
|
|
88
|
-
await new Promise((resolve) => {
|
|
89
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
90
|
-
this.log("info", "Deepgram: connection open");
|
|
91
|
-
resolve(true);
|
|
92
|
-
});
|
|
93
|
-
});
|
|
94
|
-
/* hooks onto Deepgram API events */
|
|
95
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
96
|
-
this.log("info", "Deepgram: connection close");
|
|
97
|
+
utterances: false
|
|
97
98
|
});
|
|
99
|
+
/* hook onto Deepgram API events */
|
|
98
100
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
99
101
|
const text = data.channel?.alternatives[0].transcript ?? "";
|
|
100
102
|
if (text === "")
|
|
101
103
|
return;
|
|
102
104
|
queue.emit("text", text);
|
|
103
105
|
});
|
|
106
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
107
|
+
this.log("info", "Deepgram: metadata received");
|
|
108
|
+
});
|
|
109
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
110
|
+
this.log("info", "Deepgram: connection close");
|
|
111
|
+
});
|
|
104
112
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error) => {
|
|
105
|
-
this.log("error", `Deepgram: ${error}`);
|
|
113
|
+
this.log("error", `Deepgram: ${error.message}`);
|
|
114
|
+
this.emit("error");
|
|
115
|
+
});
|
|
116
|
+
/* wait for Deepgram API to be available */
|
|
117
|
+
await new Promise((resolve) => {
|
|
118
|
+
this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
119
|
+
this.log("info", "Deepgram: connection open");
|
|
120
|
+
resolve(true);
|
|
121
|
+
});
|
|
106
122
|
});
|
|
107
123
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
108
124
|
const dg = this.dg;
|
|
@@ -117,19 +133,24 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
|
117
133
|
},
|
|
118
134
|
read(size) {
|
|
119
135
|
queue.once("text", (text) => {
|
|
120
|
-
|
|
121
|
-
this.push(text);
|
|
136
|
+
this.push(text);
|
|
122
137
|
});
|
|
138
|
+
},
|
|
139
|
+
final(callback) {
|
|
140
|
+
dg.requestClose();
|
|
123
141
|
}
|
|
124
142
|
});
|
|
125
143
|
}
|
|
144
|
+
/* close node */
|
|
126
145
|
async close() {
|
|
146
|
+
/* close stream */
|
|
127
147
|
if (this.stream !== null) {
|
|
128
148
|
this.stream.destroy();
|
|
129
149
|
this.stream = null;
|
|
130
150
|
}
|
|
151
|
+
/* shutdown Deepgram API */
|
|
131
152
|
if (this.dg !== null)
|
|
132
153
|
this.dg.requestClose();
|
|
133
154
|
}
|
|
134
155
|
}
|
|
135
|
-
exports.default =
|
|
156
|
+
exports.default = SpeechFlowNodeDeepgram;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private deepl;
|
|
5
|
+
constructor(id: string, opts: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, args: any[]);
|
|
8
|
+
open(): Promise<void>;
|
|
9
|
+
close(): Promise<void>;
|
|
10
|
+
}
|
|
@@ -41,34 +41,49 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
41
41
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
42
|
};
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
44
45
|
const node_stream_1 = __importDefault(require("node:stream"));
|
|
45
46
|
const node_events_1 = require("node:events");
|
|
46
|
-
|
|
47
|
+
/* external dependencies */
|
|
47
48
|
const DeepL = __importStar(require("deepl-node"));
|
|
49
|
+
/* internal dependencies */
|
|
50
|
+
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
|
|
51
|
+
/* SpeechFlow node for DeepL text-to-text translations */
|
|
48
52
|
class SpeechFlowNodeDeepL extends speechflow_node_1.default {
|
|
49
|
-
|
|
53
|
+
/* declare official node name */
|
|
54
|
+
static name = "deepl";
|
|
55
|
+
/* internal state */
|
|
56
|
+
deepl = null;
|
|
57
|
+
/* construct node */
|
|
50
58
|
constructor(id, opts, args) {
|
|
51
59
|
super(id, opts, args);
|
|
52
|
-
|
|
53
|
-
this.output = "text";
|
|
54
|
-
this.stream = null;
|
|
60
|
+
/* declare node configuration parameters */
|
|
55
61
|
this.configure({
|
|
56
62
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
|
|
57
63
|
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en-US)$/ },
|
|
58
|
-
dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ }
|
|
64
|
+
dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ },
|
|
65
|
+
optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
|
|
59
66
|
});
|
|
67
|
+
/* declare node input/output format */
|
|
68
|
+
this.input = "text";
|
|
69
|
+
this.output = "text";
|
|
60
70
|
}
|
|
71
|
+
/* open node */
|
|
61
72
|
async open() {
|
|
62
73
|
/* instantiate DeepL API SDK */
|
|
63
|
-
this.
|
|
74
|
+
this.deepl = new DeepL.Translator(this.params.key);
|
|
64
75
|
/* provide text-to-text translation */
|
|
65
76
|
const translate = async (text) => {
|
|
66
|
-
const result = await this.
|
|
67
|
-
splitSentences: "off"
|
|
77
|
+
const result = await this.deepl.translateText(text, this.params.src, this.params.dst, {
|
|
78
|
+
splitSentences: "off",
|
|
79
|
+
modelType: this.params.optimize === "latency" ?
|
|
80
|
+
"latency_optimized" : "prefer_quality_optimized",
|
|
81
|
+
preserveFormatting: true,
|
|
82
|
+
formality: "prefer_more"
|
|
68
83
|
});
|
|
69
84
|
return (result?.text ?? text);
|
|
70
85
|
};
|
|
71
|
-
/* establish a duplex stream and connect it to
|
|
86
|
+
/* establish a duplex stream and connect it to DeepL translation */
|
|
72
87
|
const queue = new node_events_1.EventEmitter();
|
|
73
88
|
this.stream = new node_stream_1.default.Duplex({
|
|
74
89
|
write(chunk, encoding, callback) {
|
|
@@ -93,13 +108,16 @@ class SpeechFlowNodeDeepL extends speechflow_node_1.default {
|
|
|
93
108
|
}
|
|
94
109
|
});
|
|
95
110
|
}
|
|
111
|
+
/* open node */
|
|
96
112
|
async close() {
|
|
113
|
+
/* close stream */
|
|
97
114
|
if (this.stream !== null) {
|
|
98
115
|
this.stream.destroy();
|
|
99
116
|
this.stream = null;
|
|
100
117
|
}
|
|
101
|
-
|
|
102
|
-
|
|
118
|
+
/* shutdown DeepL API */
|
|
119
|
+
if (this.deepl !== null)
|
|
120
|
+
this.deepl = null;
|
|
103
121
|
}
|
|
104
122
|
}
|
|
105
123
|
exports.default = SpeechFlowNodeDeepL;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private io;
|
|
5
|
+
constructor(id: string, opts: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, args: any[]);
|
|
8
|
+
private audioDeviceFromURL;
|
|
9
|
+
open(): Promise<void>;
|
|
10
|
+
close(): Promise<void>;
|
|
11
|
+
}
|
|
@@ -8,34 +8,85 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
8
8
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
9
9
|
};
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
/* external dependencies */
|
|
11
12
|
const naudiodon_1 = __importDefault(require("@gpeng/naudiodon"));
|
|
13
|
+
/* internal dependencies */
|
|
12
14
|
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
|
|
13
|
-
|
|
15
|
+
/* SpeechFlow node for device access */
|
|
14
16
|
class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
17
|
+
/* declare official node name */
|
|
18
|
+
static name = "device";
|
|
19
|
+
/* internal state */
|
|
15
20
|
io = null;
|
|
21
|
+
/* construct node */
|
|
16
22
|
constructor(id, opts, args) {
|
|
17
23
|
super(id, opts, args);
|
|
24
|
+
/* declare node configuration parameters */
|
|
18
25
|
this.configure({
|
|
19
26
|
device: { type: "string", pos: 0, match: /^(.+?):(.+)$/ },
|
|
20
27
|
mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
|
|
21
28
|
});
|
|
29
|
+
/* declare node input/output format */
|
|
30
|
+
if (this.params.mode === "rw") {
|
|
31
|
+
this.input = "audio";
|
|
32
|
+
this.output = "audio";
|
|
33
|
+
}
|
|
34
|
+
else if (this.params.mode === "r") {
|
|
35
|
+
this.input = "none";
|
|
36
|
+
this.output = "audio";
|
|
37
|
+
}
|
|
38
|
+
else if (this.params.mode === "w") {
|
|
39
|
+
this.input = "audio";
|
|
40
|
+
this.output = "none";
|
|
41
|
+
}
|
|
22
42
|
}
|
|
43
|
+
/* INTERNAL: utility function for finding audio device by pseudo-URL notation */
|
|
44
|
+
audioDeviceFromURL(mode, url) {
|
|
45
|
+
/* parse URL */
|
|
46
|
+
const m = url.match(/^(.+?):(.+)$/);
|
|
47
|
+
if (m === null)
|
|
48
|
+
throw new Error(`invalid audio device URL "${url}"`);
|
|
49
|
+
const [, type, name] = m;
|
|
50
|
+
/* determine audio API */
|
|
51
|
+
const apis = naudiodon_1.default.getHostAPIs();
|
|
52
|
+
const api = apis.HostAPIs.find((api) => api.type.toLowerCase() === type.toLowerCase());
|
|
53
|
+
if (!api)
|
|
54
|
+
throw new Error(`invalid audio API type "${type}"`);
|
|
55
|
+
/* determine device of audio API */
|
|
56
|
+
const devices = naudiodon_1.default.getDevices();
|
|
57
|
+
const device = devices.find((device) => {
|
|
58
|
+
return (((mode === "r" && device.maxInputChannels > 0)
|
|
59
|
+
|| (mode === "w" && device.maxOutputChannels > 0)
|
|
60
|
+
|| (mode === "rw" && device.maxInputChannels > 0 && device.maxOutputChannels > 0)
|
|
61
|
+
|| (mode === "any" && (device.maxInputChannels > 0 || device.maxOutputChannels > 0)))
|
|
62
|
+
&& device.name.match(name)
|
|
63
|
+
&& device.hostAPIName === api.name);
|
|
64
|
+
});
|
|
65
|
+
if (!device)
|
|
66
|
+
throw new Error(`invalid audio device "${name}" (of audio API type "${type}")`);
|
|
67
|
+
return device;
|
|
68
|
+
}
|
|
69
|
+
/* open node */
|
|
23
70
|
async open() {
|
|
24
71
|
/* determine device */
|
|
25
|
-
const device =
|
|
72
|
+
const device = this.audioDeviceFromURL(this.params.mode, this.params.device);
|
|
26
73
|
/* sanity check sample rate compatibility
|
|
27
74
|
(we still do not resample in input/output for simplification reasons) */
|
|
28
75
|
if (device.defaultSampleRate !== this.config.audioSampleRate)
|
|
29
|
-
throw new Error(`device
|
|
76
|
+
throw new Error(`audio device sample rate ${device.defaultSampleRate} is ` +
|
|
30
77
|
`incompatible with required sample rate ${this.config.audioSampleRate}`);
|
|
31
78
|
/* establish device connection
|
|
32
79
|
Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
|
|
33
80
|
declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
|
|
34
81
|
so it is correct to cast it back to Stream.{Readable,Writable,Duplex} */
|
|
35
|
-
|
|
82
|
+
/* FIXME: the underlying PortAudio outputs verbose/debugging messages */
|
|
83
|
+
if (this.params.mode === "rw") {
|
|
84
|
+
/* input/output device */
|
|
85
|
+
if (device.maxInputChannels === 0)
|
|
86
|
+
throw new Error(`device "${device.id}" does not have any input channels (required by read/write mode)`);
|
|
87
|
+
if (device.maxOutputChannels === 0)
|
|
88
|
+
throw new Error(`device "${device.id}" does not have any output channels (required by read/write mode)`);
|
|
36
89
|
this.log("info", `resolved "${this.params.device}" to duplex device "${device.id}"`);
|
|
37
|
-
this.input = "audio";
|
|
38
|
-
this.output = "audio";
|
|
39
90
|
this.io = naudiodon_1.default.AudioIO({
|
|
40
91
|
inOptions: {
|
|
41
92
|
deviceId: device.id,
|
|
@@ -52,10 +103,11 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
|
52
103
|
});
|
|
53
104
|
this.stream = this.io;
|
|
54
105
|
}
|
|
55
|
-
else if (
|
|
106
|
+
else if (this.params.mode === "r") {
|
|
107
|
+
/* input device */
|
|
108
|
+
if (device.maxInputChannels === 0)
|
|
109
|
+
throw new Error(`device "${device.id}" does not have any input channels (required by read mode)`);
|
|
56
110
|
this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`);
|
|
57
|
-
this.input = "none";
|
|
58
|
-
this.output = "audio";
|
|
59
111
|
this.io = naudiodon_1.default.AudioIO({
|
|
60
112
|
inOptions: {
|
|
61
113
|
deviceId: device.id,
|
|
@@ -66,10 +118,11 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
|
66
118
|
});
|
|
67
119
|
this.stream = this.io;
|
|
68
120
|
}
|
|
69
|
-
else if (
|
|
121
|
+
else if (this.params.mode === "w") {
|
|
122
|
+
/* output device */
|
|
123
|
+
if (device.maxOutputChannels === 0)
|
|
124
|
+
throw new Error(`device "${device.id}" does not have any output channels (required by write mode)`);
|
|
70
125
|
this.log("info", `resolved "${this.params.device}" to output device "${device.id}"`);
|
|
71
|
-
this.input = "audio";
|
|
72
|
-
this.output = "none";
|
|
73
126
|
this.io = naudiodon_1.default.AudioIO({
|
|
74
127
|
outOptions: {
|
|
75
128
|
deviceId: device.id,
|
|
@@ -82,14 +135,20 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
|
|
|
82
135
|
}
|
|
83
136
|
else
|
|
84
137
|
throw new Error(`device "${device.id}" does not have any input or output channels`);
|
|
85
|
-
/* pass-through errors */
|
|
138
|
+
/* pass-through PortAudio errors */
|
|
86
139
|
this.io.on("error", (err) => {
|
|
87
140
|
this.emit("error", err);
|
|
88
141
|
});
|
|
142
|
+
/* start PortAudio */
|
|
143
|
+
this.io.start();
|
|
89
144
|
}
|
|
145
|
+
/* close node */
|
|
90
146
|
async close() {
|
|
91
|
-
|
|
147
|
+
/* shutdown PortAudio */
|
|
148
|
+
if (this.io !== null) {
|
|
92
149
|
this.io.quit();
|
|
150
|
+
this.io = null;
|
|
151
|
+
}
|
|
93
152
|
}
|
|
94
153
|
}
|
|
95
154
|
exports.default = SpeechFlowNodeDevice;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private elevenlabs;
|
|
5
|
+
constructor(id: string, opts: {
|
|
6
|
+
[id: string]: any;
|
|
7
|
+
}, args: any[]);
|
|
8
|
+
open(): Promise<void>;
|
|
9
|
+
close(): Promise<void>;
|
|
10
|
+
}
|
|
@@ -41,10 +41,13 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
41
41
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
42
|
};
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
44
45
|
const node_stream_1 = __importDefault(require("node:stream"));
|
|
45
46
|
const node_events_1 = require("node:events");
|
|
47
|
+
/* external dependencies */
|
|
46
48
|
const ElevenLabs = __importStar(require("elevenlabs"));
|
|
47
49
|
const get_stream_1 = require("get-stream");
|
|
50
|
+
/* internal dependencies */
|
|
48
51
|
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
|
|
49
52
|
/*
|
|
50
53
|
const elevenlabsVoices = {
|
|
@@ -61,18 +64,25 @@ const elevenlabsVoices = {
|
|
|
61
64
|
}
|
|
62
65
|
*/
|
|
63
66
|
class SpeechFlowNodeElevenlabs extends speechflow_node_1.default {
|
|
67
|
+
/* declare official node name */
|
|
68
|
+
static name = "elevenlabs";
|
|
69
|
+
/* internal state */
|
|
64
70
|
elevenlabs = null;
|
|
71
|
+
/* construct node */
|
|
65
72
|
constructor(id, opts, args) {
|
|
66
73
|
super(id, opts, args);
|
|
74
|
+
/* declare node configuration parameters */
|
|
67
75
|
this.configure({
|
|
68
76
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
|
|
69
77
|
voice: { type: "string", val: "Brian", pos: 0 },
|
|
70
78
|
language: { type: "string", val: "de", pos: 1 }
|
|
71
79
|
});
|
|
72
|
-
|
|
73
|
-
async open() {
|
|
80
|
+
/* declare node input/output format */
|
|
74
81
|
this.input = "text";
|
|
75
82
|
this.output = "audio";
|
|
83
|
+
}
|
|
84
|
+
/* open node */
|
|
85
|
+
async open() {
|
|
76
86
|
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
77
87
|
apiKey: this.params.key
|
|
78
88
|
});
|
|
@@ -121,7 +131,9 @@ class SpeechFlowNodeElevenlabs extends speechflow_node_1.default {
|
|
|
121
131
|
}
|
|
122
132
|
});
|
|
123
133
|
}
|
|
134
|
+
/* close node */
|
|
124
135
|
async close() {
|
|
136
|
+
/* destroy stream */
|
|
125
137
|
if (this.stream !== null) {
|
|
126
138
|
this.stream.destroy();
|
|
127
139
|
this.stream = null;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private ffmpegBinary;
|
|
5
|
+
private ffmpeg;
|
|
6
|
+
constructor(id: string, opts: {
|
|
7
|
+
[id: string]: any;
|
|
8
|
+
}, args: any[]);
|
|
9
|
+
open(): Promise<void>;
|
|
10
|
+
close(): Promise<void>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
8
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
9
|
+
};
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
/* standard dependencies */
|
|
12
|
+
const node_stream_1 = __importDefault(require("node:stream"));
|
|
13
|
+
/* external dependencies */
|
|
14
|
+
const ffmpeg_1 = __importDefault(require("@rse/ffmpeg"));
|
|
15
|
+
const ffmpeg_stream_1 = require("ffmpeg-stream");
|
|
16
|
+
/* internal dependencies */
|
|
17
|
+
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
|
|
18
|
+
/* SpeechFlow node for FFmpeg */
|
|
19
|
+
class SpeechFlowNodeFFmpeg extends speechflow_node_1.default {
|
|
20
|
+
/* declare official node name */
|
|
21
|
+
static name = "ffmpeg";
|
|
22
|
+
/* internal state */
|
|
23
|
+
ffmpegBinary = ffmpeg_1.default.supported ? ffmpeg_1.default.binary : "ffmpeg";
|
|
24
|
+
ffmpeg = null;
|
|
25
|
+
/* construct node */
|
|
26
|
+
constructor(id, opts, args) {
|
|
27
|
+
super(id, opts, args);
|
|
28
|
+
/* declare node configuration parameters */
|
|
29
|
+
this.configure({
|
|
30
|
+
src: { type: "string", pos: 0, val: "pcm", match: /^(?:pcm|wav|mp3|opus)$/ },
|
|
31
|
+
dst: { type: "string", pos: 1, val: "wav", match: /^(?:pcm|wav|mp3|opus)$/ }
|
|
32
|
+
});
|
|
33
|
+
/* declare node input/output format */
|
|
34
|
+
this.input = "audio";
|
|
35
|
+
this.output = "audio";
|
|
36
|
+
}
|
|
37
|
+
/* open node */
|
|
38
|
+
async open() {
|
|
39
|
+
/* sanity check situation */
|
|
40
|
+
if (this.params.src === this.params.dst)
|
|
41
|
+
throw new Error("source and destination formats should not be the same");
|
|
42
|
+
/* instantiate FFmpeg sub-process */
|
|
43
|
+
this.ffmpeg = new ffmpeg_stream_1.Converter(this.ffmpegBinary);
|
|
44
|
+
const streamInput = this.ffmpeg.createInputStream({
|
|
45
|
+
/* FFmpeg input options */
|
|
46
|
+
"fflags": "nobuffer",
|
|
47
|
+
"flags": "low_delay",
|
|
48
|
+
"probesize": 32,
|
|
49
|
+
"analyzeduration": 0,
|
|
50
|
+
...(this.params.src === "pcm" ? {
|
|
51
|
+
"f": "s16le",
|
|
52
|
+
"ar": this.config.audioSampleRate,
|
|
53
|
+
"ac": this.config.audioChannels
|
|
54
|
+
} : {}),
|
|
55
|
+
...(this.params.src === "wav" ? {
|
|
56
|
+
"f": "wav"
|
|
57
|
+
} : {}),
|
|
58
|
+
...(this.params.src === "mp3" ? {
|
|
59
|
+
"f": "mp3"
|
|
60
|
+
} : {}),
|
|
61
|
+
...(this.params.src === "opus" ? {
|
|
62
|
+
"f": "opus"
|
|
63
|
+
} : {})
|
|
64
|
+
});
|
|
65
|
+
const streamOutput = this.ffmpeg.createOutputStream({
|
|
66
|
+
/* FFmpeg output options */
|
|
67
|
+
"flush_packets": 1,
|
|
68
|
+
...(this.params.dst === "pcm" ? {
|
|
69
|
+
"c:a": "pcm_s16le",
|
|
70
|
+
"ar": this.config.audioSampleRate,
|
|
71
|
+
"ac": this.config.audioChannels,
|
|
72
|
+
"f": "s16le",
|
|
73
|
+
} : {}),
|
|
74
|
+
...(this.params.dst === "wav" ? {
|
|
75
|
+
"f": "wav"
|
|
76
|
+
} : {}),
|
|
77
|
+
...(this.params.dst === "mp3" ? {
|
|
78
|
+
"c:a": "libmp3lame",
|
|
79
|
+
"b:a": "192k",
|
|
80
|
+
"f": "mp3"
|
|
81
|
+
} : {}),
|
|
82
|
+
...(this.params.dst === "opus" ? {
|
|
83
|
+
"acodec": "libopus",
|
|
84
|
+
"f": "opus"
|
|
85
|
+
} : {})
|
|
86
|
+
});
|
|
87
|
+
this.ffmpeg.run();
|
|
88
|
+
/* establish a duplex stream and connect it to FFmpeg */
|
|
89
|
+
this.stream = node_stream_1.default.Duplex.from({
|
|
90
|
+
readable: streamOutput,
|
|
91
|
+
writable: streamInput
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
/* close node */
|
|
95
|
+
async close() {
|
|
96
|
+
/* close duplex stream */
|
|
97
|
+
if (this.stream !== null) {
|
|
98
|
+
await new Promise((resolve) => {
|
|
99
|
+
if (this.stream instanceof node_stream_1.default.Duplex)
|
|
100
|
+
this.stream.end(() => { resolve(); });
|
|
101
|
+
else
|
|
102
|
+
resolve();
|
|
103
|
+
});
|
|
104
|
+
this.stream.destroy();
|
|
105
|
+
this.stream = null;
|
|
106
|
+
}
|
|
107
|
+
/* shutdown FFmpeg */
|
|
108
|
+
if (this.ffmpeg !== null) {
|
|
109
|
+
this.ffmpeg.kill();
|
|
110
|
+
this.ffmpeg = null;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
exports.default = SpeechFlowNodeFFmpeg;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeFile extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
constructor(id: string, opts: {
|
|
5
|
+
[id: string]: any;
|
|
6
|
+
}, args: any[]);
|
|
7
|
+
open(): Promise<void>;
|
|
8
|
+
close(): Promise<void>;
|
|
9
|
+
}
|