speechflow 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +30 -0
  2. package/dst/speechflow-node-deepgram.d.ts +10 -0
  3. package/dst/speechflow-node-deepgram.js +44 -23
  4. package/dst/speechflow-node-deepl.d.ts +10 -0
  5. package/dst/speechflow-node-deepl.js +30 -12
  6. package/dst/speechflow-node-device.d.ts +11 -0
  7. package/dst/speechflow-node-device.js +73 -14
  8. package/dst/speechflow-node-elevenlabs.d.ts +10 -0
  9. package/dst/speechflow-node-elevenlabs.js +14 -2
  10. package/dst/speechflow-node-ffmpeg.d.ts +11 -0
  11. package/dst/speechflow-node-ffmpeg.js +114 -0
  12. package/dst/speechflow-node-file.d.ts +9 -0
  13. package/dst/speechflow-node-file.js +71 -13
  14. package/dst/speechflow-node-gemma.d.ts +11 -0
  15. package/dst/speechflow-node-gemma.js +152 -0
  16. package/dst/speechflow-node-websocket.d.ts +11 -0
  17. package/dst/speechflow-node-websocket.js +34 -6
  18. package/dst/speechflow-node.d.ts +38 -0
  19. package/dst/speechflow-node.js +28 -10
  20. package/dst/speechflow.d.ts +1 -0
  21. package/dst/speechflow.js +128 -43
  22. package/etc/tsconfig.json +2 -0
  23. package/package.json +24 -10
  24. package/src/speechflow-node-deepgram.ts +55 -24
  25. package/src/speechflow-node-deepl.ts +38 -16
  26. package/src/speechflow-node-device.ts +88 -14
  27. package/src/speechflow-node-elevenlabs.ts +19 -2
  28. package/src/speechflow-node-ffmpeg.ts +122 -0
  29. package/src/speechflow-node-file.ts +76 -14
  30. package/src/speechflow-node-gemma.ts +169 -0
  31. package/src/speechflow-node-websocket.ts +52 -13
  32. package/src/speechflow-node.ts +43 -21
  33. package/src/speechflow.ts +142 -46
  34. package/dst/speechflow-util.js +0 -37
  35. package/src/speechflow-util.ts +0 -36
package/README.md CHANGED
@@ -141,6 +141,20 @@ Currently **SpeechFlow** provides the following processing nodes:
141
141
  | **device** | 0 | *none* | `/^(.+?):(.+)$/` |
142
142
  | **mode** | 1 | "rw" | `/^(?:r\|w\|rw)$/` |
143
143
 
144
+ - Node: **ffmpeg**<br/>
145
+ Purpose: **FFmpeg audio format conversion**<br/>
146
+ Example: `ffmpeg(src: "pcm", dst: "mp3")`
147
+
148
+ | Port | Payload |
149
+ | ------- | ----------- |
150
+ | input | audio |
151
+ | output | audio |
152
+
153
+ | Parameter | Position | Default | Requirement |
154
+ | ----------- | --------- | -------- | ------------------ |
155
+ | **src** | 0 | "pcm" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
156
+ | **dst** | 1 | "wav" | `/^(?:pcm\|wav\|mp3\|opus)$/` |
157
+
144
158
  - Node: **deepgram**<br/>
145
159
  Purpose: **Deepgram Speech-to-Text conversion**<br/>
146
160
  Example: `deepgram(language: "de")`<br/>
@@ -174,6 +188,22 @@ Currently **SpeechFlow** provides the following processing nodes:
174
188
  | **src** | 0 | "de" | `/^(?:de\|en-US)$/` |
175
189
  | **dst** | 1 | "en-US" | `/^(?:de\|en-US)$/` |
176
190
 
191
+ - Node: **gemma**<br/>
192
+ Purpose: **Google Gemma Text-to-Text translation**<br/>
193
+ Example: `gemma(src: "de", dst: "en")`<br/>
194
+ Notice; this node requires the Ollama API!
195
+
196
+ | Port | Payload |
197
+ | ------- | ----------- |
198
+ | input | text |
199
+ | output | text |
200
+
201
+ | Parameter | Position | Default | Requirement |
202
+ | ------------ | --------- | -------- | ------------------ |
203
+ | **url** | *none* | "http://127.0.0.1:11434" | `/^https?:\/\/.+?:\d+$/` |
204
+ | **src** | 0 | "de" | `/^(?:de\|en)$/` |
205
+ | **dst** | 1 | "en" | `/^(?:de\|en)$/` |
206
+
177
207
  - Node: **elevenlabs**<br/>
178
208
  Purpose: **ElevenLabs Text-to-Speech conversion**<br/>
179
209
  Example: `elevenlabs(language: "en")`<br/>
@@ -0,0 +1,10 @@
1
+ import SpeechFlowNode from "./speechflow-node";
2
+ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
3
+ static name: string;
4
+ private dg;
5
+ constructor(id: string, opts: {
6
+ [id: string]: any;
7
+ }, args: any[]);
8
+ open(): Promise<void>;
9
+ close(): Promise<void>;
10
+ }
@@ -41,30 +41,41 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
41
41
  return (mod && mod.__esModule) ? mod : { "default": mod };
42
42
  };
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
+ /* standard dependencies */
44
45
  const node_events_1 = require("node:events");
46
+ /* external dependencies */
45
47
  const node_stream_1 = __importDefault(require("node:stream"));
46
48
  const Deepgram = __importStar(require("@deepgram/sdk"));
49
+ /* internal dependencies */
47
50
  const speechflow_node_1 = __importDefault(require("./speechflow-node"));
48
- class SpeechFlowNodeDevice extends speechflow_node_1.default {
51
+ /* SpeechFlow node for Deepgram speech-to-text conversion */
52
+ class SpeechFlowNodeDeepgram extends speechflow_node_1.default {
53
+ /* declare official node name */
54
+ static name = "deepgram";
55
+ /* internal state */
49
56
  dg = null;
57
+ /* construct node */
50
58
  constructor(id, opts, args) {
51
59
  super(id, opts, args);
60
+ /* declare node configuration parameters */
52
61
  this.configure({
53
62
  key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
54
- model: { type: "string", val: "nova-2", pos: 0 }, /* FIXME: nova-3 multiligual */
63
+ model: { type: "string", val: "nova-3", pos: 0 },
55
64
  version: { type: "string", val: "latest", pos: 1 },
56
- language: { type: "string", val: "de", pos: 2 }
65
+ language: { type: "string", val: "multi", pos: 2 }
57
66
  });
58
- }
59
- async open() {
67
+ /* declare node input/output format */
60
68
  this.input = "audio";
61
69
  this.output = "text";
62
- this.stream = null;
70
+ }
71
+ /* open node */
72
+ async open() {
63
73
  /* sanity check situation */
64
74
  if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
65
75
  throw new Error("Deepgram node currently supports PCM-S16LE audio only");
66
- /* connect to Deepgram API */
76
+ /* create queue for results */
67
77
  const queue = new node_events_1.EventEmitter();
78
+ /* connect to Deepgram API */
68
79
  const deepgram = Deepgram.createClient(this.params.key);
69
80
  this.dg = deepgram.listen.live({
70
81
  model: this.params.model,
@@ -74,7 +85,7 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
74
85
  sample_rate: this.config.audioSampleRate,
75
86
  encoding: "linear16",
76
87
  multichannel: false,
77
- // endpointing: false, /* FIXME: ? */
88
+ endpointing: 10,
78
89
  interim_results: false,
79
90
  smart_format: true,
80
91
  punctuate: true,
@@ -83,26 +94,31 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
83
94
  numerals: true,
84
95
  paragraphs: true,
85
96
  profanity_filter: true,
86
- utterances: false,
87
- });
88
- await new Promise((resolve) => {
89
- this.dg.on(Deepgram.LiveTranscriptionEvents.Open, () => {
90
- this.log("info", "Deepgram: connection open");
91
- resolve(true);
92
- });
93
- });
94
- /* hooks onto Deepgram API events */
95
- this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
96
- this.log("info", "Deepgram: connection close");
97
+ utterances: false
97
98
  });
99
+ /* hook onto Deepgram API events */
98
100
  this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
99
101
  const text = data.channel?.alternatives[0].transcript ?? "";
100
102
  if (text === "")
101
103
  return;
102
104
  queue.emit("text", text);
103
105
  });
106
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
107
+ this.log("info", "Deepgram: metadata received");
108
+ });
109
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
110
+ this.log("info", "Deepgram: connection close");
111
+ });
104
112
  this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error) => {
105
- this.log("error", `Deepgram: ${error}`);
113
+ this.log("error", `Deepgram: ${error.message}`);
114
+ this.emit("error");
115
+ });
116
+ /* wait for Deepgram API to be available */
117
+ await new Promise((resolve) => {
118
+ this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
119
+ this.log("info", "Deepgram: connection open");
120
+ resolve(true);
121
+ });
106
122
  });
107
123
  /* provide Duplex stream and internally attach to Deepgram API */
108
124
  const dg = this.dg;
@@ -117,19 +133,24 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
117
133
  },
118
134
  read(size) {
119
135
  queue.once("text", (text) => {
120
- if (text !== "")
121
- this.push(text);
136
+ this.push(text);
122
137
  });
138
+ },
139
+ final(callback) {
140
+ dg.requestClose();
123
141
  }
124
142
  });
125
143
  }
144
+ /* close node */
126
145
  async close() {
146
+ /* close stream */
127
147
  if (this.stream !== null) {
128
148
  this.stream.destroy();
129
149
  this.stream = null;
130
150
  }
151
+ /* shutdown Deepgram API */
131
152
  if (this.dg !== null)
132
153
  this.dg.requestClose();
133
154
  }
134
155
  }
135
- exports.default = SpeechFlowNodeDevice;
156
+ exports.default = SpeechFlowNodeDeepgram;
@@ -0,0 +1,10 @@
1
+ import SpeechFlowNode from "./speechflow-node";
2
+ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
3
+ static name: string;
4
+ private deepl;
5
+ constructor(id: string, opts: {
6
+ [id: string]: any;
7
+ }, args: any[]);
8
+ open(): Promise<void>;
9
+ close(): Promise<void>;
10
+ }
@@ -41,34 +41,49 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
41
41
  return (mod && mod.__esModule) ? mod : { "default": mod };
42
42
  };
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
+ /* standard dependencies */
44
45
  const node_stream_1 = __importDefault(require("node:stream"));
45
46
  const node_events_1 = require("node:events");
46
- const speechflow_node_1 = __importDefault(require("./speechflow-node"));
47
+ /* external dependencies */
47
48
  const DeepL = __importStar(require("deepl-node"));
49
+ /* internal dependencies */
50
+ const speechflow_node_1 = __importDefault(require("./speechflow-node"));
51
+ /* SpeechFlow node for DeepL text-to-text translations */
48
52
  class SpeechFlowNodeDeepL extends speechflow_node_1.default {
49
- translator = null;
53
+ /* declare official node name */
54
+ static name = "deepl";
55
+ /* internal state */
56
+ deepl = null;
57
+ /* construct node */
50
58
  constructor(id, opts, args) {
51
59
  super(id, opts, args);
52
- this.input = "text";
53
- this.output = "text";
54
- this.stream = null;
60
+ /* declare node configuration parameters */
55
61
  this.configure({
56
62
  key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
57
63
  src: { type: "string", pos: 0, val: "de", match: /^(?:de|en-US)$/ },
58
- dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ }
64
+ dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ },
65
+ optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
59
66
  });
67
+ /* declare node input/output format */
68
+ this.input = "text";
69
+ this.output = "text";
60
70
  }
71
+ /* open node */
61
72
  async open() {
62
73
  /* instantiate DeepL API SDK */
63
- this.translator = new DeepL.Translator(this.params.key);
74
+ this.deepl = new DeepL.Translator(this.params.key);
64
75
  /* provide text-to-text translation */
65
76
  const translate = async (text) => {
66
- const result = await this.translator.translateText(text, this.params.src, this.params.dst, {
67
- splitSentences: "off"
77
+ const result = await this.deepl.translateText(text, this.params.src, this.params.dst, {
78
+ splitSentences: "off",
79
+ modelType: this.params.optimize === "latency" ?
80
+ "latency_optimized" : "prefer_quality_optimized",
81
+ preserveFormatting: true,
82
+ formality: "prefer_more"
68
83
  });
69
84
  return (result?.text ?? text);
70
85
  };
71
- /* establish a duplex stream and connect it to the translation */
86
+ /* establish a duplex stream and connect it to DeepL translation */
72
87
  const queue = new node_events_1.EventEmitter();
73
88
  this.stream = new node_stream_1.default.Duplex({
74
89
  write(chunk, encoding, callback) {
@@ -93,13 +108,16 @@ class SpeechFlowNodeDeepL extends speechflow_node_1.default {
93
108
  }
94
109
  });
95
110
  }
111
+ /* open node */
96
112
  async close() {
113
+ /* close stream */
97
114
  if (this.stream !== null) {
98
115
  this.stream.destroy();
99
116
  this.stream = null;
100
117
  }
101
- if (this.translator !== null)
102
- this.translator = null;
118
+ /* shutdown DeepL API */
119
+ if (this.deepl !== null)
120
+ this.deepl = null;
103
121
  }
104
122
  }
105
123
  exports.default = SpeechFlowNodeDeepL;
@@ -0,0 +1,11 @@
1
+ import SpeechFlowNode from "./speechflow-node";
2
+ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
3
+ static name: string;
4
+ private io;
5
+ constructor(id: string, opts: {
6
+ [id: string]: any;
7
+ }, args: any[]);
8
+ private audioDeviceFromURL;
9
+ open(): Promise<void>;
10
+ close(): Promise<void>;
11
+ }
@@ -8,34 +8,85 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
8
8
  return (mod && mod.__esModule) ? mod : { "default": mod };
9
9
  };
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
+ /* external dependencies */
11
12
  const naudiodon_1 = __importDefault(require("@gpeng/naudiodon"));
13
+ /* internal dependencies */
12
14
  const speechflow_node_1 = __importDefault(require("./speechflow-node"));
13
- const speechflow_util_1 = __importDefault(require("./speechflow-util"));
15
+ /* SpeechFlow node for device access */
14
16
  class SpeechFlowNodeDevice extends speechflow_node_1.default {
17
+ /* declare official node name */
18
+ static name = "device";
19
+ /* internal state */
15
20
  io = null;
21
+ /* construct node */
16
22
  constructor(id, opts, args) {
17
23
  super(id, opts, args);
24
+ /* declare node configuration parameters */
18
25
  this.configure({
19
26
  device: { type: "string", pos: 0, match: /^(.+?):(.+)$/ },
20
27
  mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
21
28
  });
29
+ /* declare node input/output format */
30
+ if (this.params.mode === "rw") {
31
+ this.input = "audio";
32
+ this.output = "audio";
33
+ }
34
+ else if (this.params.mode === "r") {
35
+ this.input = "none";
36
+ this.output = "audio";
37
+ }
38
+ else if (this.params.mode === "w") {
39
+ this.input = "audio";
40
+ this.output = "none";
41
+ }
22
42
  }
43
+ /* INTERNAL: utility function for finding audio device by pseudo-URL notation */
44
+ audioDeviceFromURL(mode, url) {
45
+ /* parse URL */
46
+ const m = url.match(/^(.+?):(.+)$/);
47
+ if (m === null)
48
+ throw new Error(`invalid audio device URL "${url}"`);
49
+ const [, type, name] = m;
50
+ /* determine audio API */
51
+ const apis = naudiodon_1.default.getHostAPIs();
52
+ const api = apis.HostAPIs.find((api) => api.type.toLowerCase() === type.toLowerCase());
53
+ if (!api)
54
+ throw new Error(`invalid audio API type "${type}"`);
55
+ /* determine device of audio API */
56
+ const devices = naudiodon_1.default.getDevices();
57
+ const device = devices.find((device) => {
58
+ return (((mode === "r" && device.maxInputChannels > 0)
59
+ || (mode === "w" && device.maxOutputChannels > 0)
60
+ || (mode === "rw" && device.maxInputChannels > 0 && device.maxOutputChannels > 0)
61
+ || (mode === "any" && (device.maxInputChannels > 0 || device.maxOutputChannels > 0)))
62
+ && device.name.match(name)
63
+ && device.hostAPIName === api.name);
64
+ });
65
+ if (!device)
66
+ throw new Error(`invalid audio device "${name}" (of audio API type "${type}")`);
67
+ return device;
68
+ }
69
+ /* open node */
23
70
  async open() {
24
71
  /* determine device */
25
- const device = speechflow_util_1.default.audioDeviceFromURL(this.params.mode, this.params.device);
72
+ const device = this.audioDeviceFromURL(this.params.mode, this.params.device);
26
73
  /* sanity check sample rate compatibility
27
74
  (we still do not resample in input/output for simplification reasons) */
28
75
  if (device.defaultSampleRate !== this.config.audioSampleRate)
29
- throw new Error(`device audio sample rate ${device.defaultSampleRate} is ` +
76
+ throw new Error(`audio device sample rate ${device.defaultSampleRate} is ` +
30
77
  `incompatible with required sample rate ${this.config.audioSampleRate}`);
31
78
  /* establish device connection
32
79
  Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
33
80
  declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
34
81
  so it is correct to cast it back to Stream.{Readable,Writable,Duplex} */
35
- if (device.maxInputChannels > 0 && device.maxOutputChannels > 0) {
82
+ /* FIXME: the underlying PortAudio outputs verbose/debugging messages */
83
+ if (this.params.mode === "rw") {
84
+ /* input/output device */
85
+ if (device.maxInputChannels === 0)
86
+ throw new Error(`device "${device.id}" does not have any input channels (required by read/write mode)`);
87
+ if (device.maxOutputChannels === 0)
88
+ throw new Error(`device "${device.id}" does not have any output channels (required by read/write mode)`);
36
89
  this.log("info", `resolved "${this.params.device}" to duplex device "${device.id}"`);
37
- this.input = "audio";
38
- this.output = "audio";
39
90
  this.io = naudiodon_1.default.AudioIO({
40
91
  inOptions: {
41
92
  deviceId: device.id,
@@ -52,10 +103,11 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
52
103
  });
53
104
  this.stream = this.io;
54
105
  }
55
- else if (device.maxInputChannels > 0 && device.maxOutputChannels === 0) {
106
+ else if (this.params.mode === "r") {
107
+ /* input device */
108
+ if (device.maxInputChannels === 0)
109
+ throw new Error(`device "${device.id}" does not have any input channels (required by read mode)`);
56
110
  this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`);
57
- this.input = "none";
58
- this.output = "audio";
59
111
  this.io = naudiodon_1.default.AudioIO({
60
112
  inOptions: {
61
113
  deviceId: device.id,
@@ -66,10 +118,11 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
66
118
  });
67
119
  this.stream = this.io;
68
120
  }
69
- else if (device.maxInputChannels === 0 && device.maxOutputChannels > 0) {
121
+ else if (this.params.mode === "w") {
122
+ /* output device */
123
+ if (device.maxOutputChannels === 0)
124
+ throw new Error(`device "${device.id}" does not have any output channels (required by write mode)`);
70
125
  this.log("info", `resolved "${this.params.device}" to output device "${device.id}"`);
71
- this.input = "audio";
72
- this.output = "none";
73
126
  this.io = naudiodon_1.default.AudioIO({
74
127
  outOptions: {
75
128
  deviceId: device.id,
@@ -82,14 +135,20 @@ class SpeechFlowNodeDevice extends speechflow_node_1.default {
82
135
  }
83
136
  else
84
137
  throw new Error(`device "${device.id}" does not have any input or output channels`);
85
- /* pass-through errors */
138
+ /* pass-through PortAudio errors */
86
139
  this.io.on("error", (err) => {
87
140
  this.emit("error", err);
88
141
  });
142
+ /* start PortAudio */
143
+ this.io.start();
89
144
  }
145
+ /* close node */
90
146
  async close() {
91
- if (this.io !== null)
147
+ /* shutdown PortAudio */
148
+ if (this.io !== null) {
92
149
  this.io.quit();
150
+ this.io = null;
151
+ }
93
152
  }
94
153
  }
95
154
  exports.default = SpeechFlowNodeDevice;
@@ -0,0 +1,10 @@
1
+ import SpeechFlowNode from "./speechflow-node";
2
+ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
3
+ static name: string;
4
+ private elevenlabs;
5
+ constructor(id: string, opts: {
6
+ [id: string]: any;
7
+ }, args: any[]);
8
+ open(): Promise<void>;
9
+ close(): Promise<void>;
10
+ }
@@ -41,10 +41,13 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
41
41
  return (mod && mod.__esModule) ? mod : { "default": mod };
42
42
  };
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
+ /* standard dependencies */
44
45
  const node_stream_1 = __importDefault(require("node:stream"));
45
46
  const node_events_1 = require("node:events");
47
+ /* external dependencies */
46
48
  const ElevenLabs = __importStar(require("elevenlabs"));
47
49
  const get_stream_1 = require("get-stream");
50
+ /* internal dependencies */
48
51
  const speechflow_node_1 = __importDefault(require("./speechflow-node"));
49
52
  /*
50
53
  const elevenlabsVoices = {
@@ -61,18 +64,25 @@ const elevenlabsVoices = {
61
64
  }
62
65
  */
63
66
  class SpeechFlowNodeElevenlabs extends speechflow_node_1.default {
67
+ /* declare official node name */
68
+ static name = "elevenlabs";
69
+ /* internal state */
64
70
  elevenlabs = null;
71
+ /* construct node */
65
72
  constructor(id, opts, args) {
66
73
  super(id, opts, args);
74
+ /* declare node configuration parameters */
67
75
  this.configure({
68
76
  key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
69
77
  voice: { type: "string", val: "Brian", pos: 0 },
70
78
  language: { type: "string", val: "de", pos: 1 }
71
79
  });
72
- }
73
- async open() {
80
+ /* declare node input/output format */
74
81
  this.input = "text";
75
82
  this.output = "audio";
83
+ }
84
+ /* open node */
85
+ async open() {
76
86
  this.elevenlabs = new ElevenLabs.ElevenLabsClient({
77
87
  apiKey: this.params.key
78
88
  });
@@ -121,7 +131,9 @@ class SpeechFlowNodeElevenlabs extends speechflow_node_1.default {
121
131
  }
122
132
  });
123
133
  }
134
+ /* close node */
124
135
  async close() {
136
+ /* destroy stream */
125
137
  if (this.stream !== null) {
126
138
  this.stream.destroy();
127
139
  this.stream = null;
@@ -0,0 +1,11 @@
1
+ import SpeechFlowNode from "./speechflow-node";
2
+ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
3
+ static name: string;
4
+ private ffmpegBinary;
5
+ private ffmpeg;
6
+ constructor(id: string, opts: {
7
+ [id: string]: any;
8
+ }, args: any[]);
9
+ open(): Promise<void>;
10
+ close(): Promise<void>;
11
+ }
@@ -0,0 +1,114 @@
1
+ "use strict";
2
+ /*
3
+ ** SpeechFlow - Speech Processing Flow Graph
4
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
+ */
7
+ var __importDefault = (this && this.__importDefault) || function (mod) {
8
+ return (mod && mod.__esModule) ? mod : { "default": mod };
9
+ };
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ /* standard dependencies */
12
+ const node_stream_1 = __importDefault(require("node:stream"));
13
+ /* external dependencies */
14
+ const ffmpeg_1 = __importDefault(require("@rse/ffmpeg"));
15
+ const ffmpeg_stream_1 = require("ffmpeg-stream");
16
+ /* internal dependencies */
17
+ const speechflow_node_1 = __importDefault(require("./speechflow-node"));
18
+ /* SpeechFlow node for FFmpeg */
19
+ class SpeechFlowNodeFFmpeg extends speechflow_node_1.default {
20
+ /* declare official node name */
21
+ static name = "ffmpeg";
22
+ /* internal state */
23
+ ffmpegBinary = ffmpeg_1.default.supported ? ffmpeg_1.default.binary : "ffmpeg";
24
+ ffmpeg = null;
25
+ /* construct node */
26
+ constructor(id, opts, args) {
27
+ super(id, opts, args);
28
+ /* declare node configuration parameters */
29
+ this.configure({
30
+ src: { type: "string", pos: 0, val: "pcm", match: /^(?:pcm|wav|mp3|opus)$/ },
31
+ dst: { type: "string", pos: 1, val: "wav", match: /^(?:pcm|wav|mp3|opus)$/ }
32
+ });
33
+ /* declare node input/output format */
34
+ this.input = "audio";
35
+ this.output = "audio";
36
+ }
37
+ /* open node */
38
+ async open() {
39
+ /* sanity check situation */
40
+ if (this.params.src === this.params.dst)
41
+ throw new Error("source and destination formats should not be the same");
42
+ /* instantiate FFmpeg sub-process */
43
+ this.ffmpeg = new ffmpeg_stream_1.Converter(this.ffmpegBinary);
44
+ const streamInput = this.ffmpeg.createInputStream({
45
+ /* FFmpeg input options */
46
+ "fflags": "nobuffer",
47
+ "flags": "low_delay",
48
+ "probesize": 32,
49
+ "analyzeduration": 0,
50
+ ...(this.params.src === "pcm" ? {
51
+ "f": "s16le",
52
+ "ar": this.config.audioSampleRate,
53
+ "ac": this.config.audioChannels
54
+ } : {}),
55
+ ...(this.params.src === "wav" ? {
56
+ "f": "wav"
57
+ } : {}),
58
+ ...(this.params.src === "mp3" ? {
59
+ "f": "mp3"
60
+ } : {}),
61
+ ...(this.params.src === "opus" ? {
62
+ "f": "opus"
63
+ } : {})
64
+ });
65
+ const streamOutput = this.ffmpeg.createOutputStream({
66
+ /* FFmpeg output options */
67
+ "flush_packets": 1,
68
+ ...(this.params.dst === "pcm" ? {
69
+ "c:a": "pcm_s16le",
70
+ "ar": this.config.audioSampleRate,
71
+ "ac": this.config.audioChannels,
72
+ "f": "s16le",
73
+ } : {}),
74
+ ...(this.params.dst === "wav" ? {
75
+ "f": "wav"
76
+ } : {}),
77
+ ...(this.params.dst === "mp3" ? {
78
+ "c:a": "libmp3lame",
79
+ "b:a": "192k",
80
+ "f": "mp3"
81
+ } : {}),
82
+ ...(this.params.dst === "opus" ? {
83
+ "acodec": "libopus",
84
+ "f": "opus"
85
+ } : {})
86
+ });
87
+ this.ffmpeg.run();
88
+ /* establish a duplex stream and connect it to FFmpeg */
89
+ this.stream = node_stream_1.default.Duplex.from({
90
+ readable: streamOutput,
91
+ writable: streamInput
92
+ });
93
+ }
94
+ /* close node */
95
+ async close() {
96
+ /* close duplex stream */
97
+ if (this.stream !== null) {
98
+ await new Promise((resolve) => {
99
+ if (this.stream instanceof node_stream_1.default.Duplex)
100
+ this.stream.end(() => { resolve(); });
101
+ else
102
+ resolve();
103
+ });
104
+ this.stream.destroy();
105
+ this.stream = null;
106
+ }
107
+ /* shutdown FFmpeg */
108
+ if (this.ffmpeg !== null) {
109
+ this.ffmpeg.kill();
110
+ this.ffmpeg = null;
111
+ }
112
+ }
113
+ }
114
+ exports.default = SpeechFlowNodeFFmpeg;
@@ -0,0 +1,9 @@
1
+ import SpeechFlowNode from "./speechflow-node";
2
+ export default class SpeechFlowNodeFile extends SpeechFlowNode {
3
+ static name: string;
4
+ constructor(id: string, opts: {
5
+ [id: string]: any;
6
+ }, args: any[]);
7
+ open(): Promise<void>;
8
+ close(): Promise<void>;
9
+ }