speechflow 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +239 -0
- package/dst/speechflow-node-deepgram.js +135 -0
- package/dst/speechflow-node-deepl.js +105 -0
- package/dst/speechflow-node-device.js +95 -0
- package/dst/speechflow-node-elevenlabs.js +131 -0
- package/dst/speechflow-node-file.js +47 -0
- package/dst/speechflow-node-websocket.js +147 -0
- package/dst/speechflow-node.js +77 -0
- package/dst/speechflow-util.js +37 -0
- package/dst/speechflow.js +223 -0
- package/etc/biome.jsonc +37 -0
- package/etc/eslint.mjs +95 -0
- package/etc/nps.yaml +40 -0
- package/etc/oxlint.jsonc +20 -0
- package/etc/tsconfig.json +23 -0
- package/package.json +76 -0
- package/sample.yaml +32 -0
- package/src/lib.d.ts +20 -0
- package/src/speechflow-logo.ai +1492 -4
- package/src/speechflow-logo.svg +46 -0
- package/src/speechflow-node-deepgram.ts +102 -0
- package/src/speechflow-node-deepl.ts +76 -0
- package/src/speechflow-node-device.ts +96 -0
- package/src/speechflow-node-elevenlabs.ts +99 -0
- package/src/speechflow-node-file.ts +46 -0
- package/src/speechflow-node-websocket.ts +140 -0
- package/src/speechflow-node.ts +76 -0
- package/src/speechflow-util.ts +36 -0
- package/src/speechflow.ts +242 -0
- package/tsconfig.json +3 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 580 286">
|
|
3
|
+
<!-- Generator: Adobe Illustrator 29.4.0, SVG Export Plug-In . SVG Version: 2.1.0 Build 152) -->
|
|
4
|
+
<defs>
|
|
5
|
+
<style>
|
|
6
|
+
.st0 {
|
|
7
|
+
fill: #fff;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
.st0, .st1 {
|
|
11
|
+
fill-rule: evenodd;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
.st1 {
|
|
15
|
+
fill: #b06820;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
.st2 {
|
|
19
|
+
fill: #369;
|
|
20
|
+
}
|
|
21
|
+
</style>
|
|
22
|
+
</defs>
|
|
23
|
+
<g>
|
|
24
|
+
<path d="M23.1,90.8c4.1,2.8,9.8,4.8,15.9,4.8,10.8,0,17.5-6.1,17.5-15.1s-4.2-12.9-14.7-17c-11.7-4.1-18.7-10.1-18.7-20s8.8-18.4,21.1-18.4,12,1.8,14.4,3.4l-1.9,4.3c-1.9-1.4-6.6-3.4-12.8-3.4-11.7,0-15.7,7.5-15.7,13.3,0,8.1,4.6,12.2,15.1,16.3,12,4.8,18.3,10.2,18.3,21.1s-7.5,19.9-23.1,19.9-13.6-2-17.3-4.8l1.9-4.3Z"/>
|
|
25
|
+
<path d="M70,63.7c0-6.5-.2-11.6-.4-16.5h4.8l.4,9.8h.2c3.7-6.9,10-11,19.1-11,13.2,0,22.2,11,22.2,26.2s-11,27.9-23.5,27.9-14-3.3-17.6-9.6h-.2v29.7h-5.1v-56.6ZM75,78.5c0,1.4.2,2.8.4,4.1,2.2,8,9,13.1,17,13.1,11.9,0,18.6-9.6,18.6-23.4s-6.5-22.2-18.2-22.2-14.9,5.3-17.1,13.8c-.3,1.4-.8,3-.8,4.4v10.1Z"/>
|
|
26
|
+
<path d="M124.8,72.5c0,16.6,9,23.1,19.7,23.1s11.4-1.5,14.3-2.9l1.3,4.1c-1.9,1.1-7.2,3.2-16.3,3.2-14.9,0-24.1-10.8-24.1-26s9.9-28,23.3-28,20.1,15.9,20.1,23.4,0,2.3-.2,3.1h-38.1ZM157.8,68.4c.1-7.2-2.9-18.2-15.4-18.2s-16.4,10.2-17.2,18.2h32.6Z"/>
|
|
27
|
+
<path d="M171.7,72.5c0,16.6,9,23.1,19.7,23.1s11.4-1.5,14.3-2.9l1.3,4.1c-1.9,1.1-7.2,3.2-16.3,3.2-14.9,0-24.1-10.8-24.1-26s9.9-28,23.3-28,20.1,15.9,20.1,23.4,0,2.3-.2,3.1h-38.1ZM204.6,68.4c.1-7.2-2.9-18.2-15.4-18.2s-16.4,10.2-17.2,18.2h32.6Z"/>
|
|
28
|
+
<path d="M253.4,96.6c-2.3,1.2-7.6,3.3-15.1,3.3-15,0-24.9-10.8-24.9-26.4s11.2-27.6,26.7-27.6,11.5,1.8,13.6,3.2l-1.8,4.2c-2.6-1.4-6.6-3-12.3-3-13.9,0-21,10.7-21,22.8s8.5,22.3,20.5,22.3,10.1-1.7,12.9-2.9l1.4,4Z"/>
|
|
29
|
+
<path d="M260.4,22.4h5.1v34h.2c1.5-2.9,4-5.7,7.1-7.5,2.9-1.8,6.5-2.9,10.4-2.9s17.9,2.5,17.9,21.7v31.2h-5.1v-30.6c0-9.4-3.7-17.9-14.2-17.9s-13.5,5.3-15.6,11.7c-.5,1.5-.8,3-.8,5.1v31.7h-5.1V22.4Z"/>
|
|
30
|
+
</g>
|
|
31
|
+
<g>
|
|
32
|
+
<path class="st2" d="M102.1,122.7h71.1v21.6h-44.7v26.6h41.8v21.4h-41.8v46.8h-26.4v-116.4Z"/>
|
|
33
|
+
<path class="st2" d="M184.5,122.7h26.4v94.3h46.3v22.1h-72.7v-116.4Z"/>
|
|
34
|
+
</g>
|
|
35
|
+
<path class="st1" d="M329.8,110.5c-43.9,0-79.7,31.5-79.7,70.3s6.5,32.2,18.3,44.8c2.3,9.6-.5,19.8-7.5,26.9-2.9,2.9-.9,8,3.3,8,13.4,0,26.2-5.2,35.8-14.5,9.5,3.4,19.5,5.1,29.8,5.1,43.9,0,80.3-31.5,80.3-70.3s-36.4-70.3-80.3-70.3Z"/>
|
|
36
|
+
<g id="Icon">
|
|
37
|
+
<path class="st0" d="M318.3,174.4v9c0,2.7,1.1,5.2,3,7.1s4.5,3,7.1,3h9c2.7,0,5.2-1.1,7.1-3s3-4.5,3-7.1v-9c0-2.7-1.1-5.2-3-7.1-1.9-1.9-4.5-3-7.1-3h-9c-2.7,0-5.2,1.1-7.1,3s-3,4.5-3,7.1Z"/>
|
|
38
|
+
<path class="st0" d="M284.7,140.8v9c0,2.7,1.1,5.2,3,7.1s4.5,3,7.1,3h9c2.7,0,5.2-1.1,7.1-3s3-4.5,3-7.1v-9c0-2.7-1.1-5.2-3-7.1s-4.5-3-7.1-3h-9c-2.7,0-5.2,1.1-7.1,3s-3,4.5-3,7.1Z"/>
|
|
39
|
+
<path class="st0" d="M351.9,208v9c0,2.7,1.1,5.2,3,7.1,1.9,1.9,4.5,3,7.1,3h9c2.7,0,5.2-1.1,7.1-3,1.9-1.9,3-4.5,3-7.1v-9c0-2.7-1.1-5.2-3-7.1s-4.5-3-7.1-3h-9c-2.7,0-5.2,1.1-7.1,3s-3,4.5-3,7.1Z"/>
|
|
40
|
+
<path class="st0" d="M310.4,148.7h58.3c1.5,0,2.9.6,4,1.6,1,1.1,1.6,2.5,1.6,4v15.7c0,1.5-.6,2.9-1.6,4-1.1,1-2.5,1.6-4,1.6h-13.4c-1.9,0-3.4,1.5-3.4,3.4s1.5,3.4,3.4,3.4h13.4c3.3,0,6.4-1.3,8.7-3.6,2.3-2.3,3.6-5.4,3.6-8.7v-15.7c0-3.3-1.3-6.4-3.6-8.7-2.3-2.3-5.4-3.6-8.7-3.6h-58.3c-1.9,0-3.4,1.5-3.4,3.4s1.5,3.4,3.4,3.4Z"/>
|
|
41
|
+
<path class="st0" d="M337.3,209.2h-40.3c-1.5,0-2.9-.6-4-1.6-1-1.1-1.6-2.5-1.6-4v-15.7c0-1.5.6-2.9,1.6-4,1.1-1,2.5-1.6,4-1.6h24.6c1.9,0,3.4-1.5,3.4-3.4s-1.5-3.4-3.4-3.4h-24.6c-3.3,0-6.4,1.3-8.7,3.6-2.3,2.3-3.6,5.4-3.6,8.7v15.7c0,3.3,1.3,6.4,3.6,8.7,2.3,2.3,5.4,3.6,8.7,3.6h40.3c1.9,0,3.4-1.5,3.4-3.4s-1.5-3.4-3.4-3.4Z"/>
|
|
42
|
+
<path class="st0" d="M366.6,185.5l-6.6-6.6,6.6-6.6c1.3-1.3,1.3-3.4,0-4.8-1.3-1.3-3.4-1.3-4.8,0l-9,9c-1.3,1.3-1.3,3.4,0,4.8l9,9c1.3,1.3,3.4,1.3,4.8,0,1.3-1.3,1.3-3.4,0-4.8h0Z"/>
|
|
43
|
+
<path class="st0" d="M335.2,223.9l9-9c1.3-1.3,1.3-3.4,0-4.8l-9-9c-1.3-1.3-3.4-1.3-4.8,0-1.3,1.3-1.3,3.4,0,4.8l6.6,6.6-6.6,6.6c-1.3,1.3-1.3,3.4,0,4.8,1.3,1.3,3.4,1.3,4.8,0h0Z"/>
|
|
44
|
+
</g>
|
|
45
|
+
<path class="st2" d="M432.3,239.1l-27.6-116.4h28.1l8.8,48c2.6,13.8,5,28.8,6.9,40.6h.3c1.9-12.6,4.7-26.6,7.6-40.9l9.8-47.7h28l9.3,49c2.6,13.6,4.5,26.1,6.2,39h.3c1.7-13,4.3-26.6,6.7-40.4l9.5-47.7h26.8l-30,116.4h-28.5l-9.8-50.1c-2.2-11.7-4.1-22.6-5.5-35.9h-.3c-2.1,13.1-4,24.2-6.7,35.9l-11.1,50.1h-28.8Z"/>
|
|
46
|
+
</svg>
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { EventEmitter } from "node:events"
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
import * as Deepgram from "@deepgram/sdk"
|
|
10
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
11
|
+
|
|
12
|
+
export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
13
|
+
private dg: Deepgram.LiveClient | null = null
|
|
14
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
15
|
+
super(id, opts, args)
|
|
16
|
+
this.configure({
|
|
17
|
+
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
|
|
18
|
+
model: { type: "string", val: "nova-2", pos: 0 }, /* FIXME: nova-3 multiligual */
|
|
19
|
+
version: { type: "string", val: "latest", pos: 1 },
|
|
20
|
+
language: { type: "string", val: "de", pos: 2 }
|
|
21
|
+
})
|
|
22
|
+
}
|
|
23
|
+
async open () {
|
|
24
|
+
this.input = "audio"
|
|
25
|
+
this.output = "text"
|
|
26
|
+
this.stream = null
|
|
27
|
+
|
|
28
|
+
/* sanity check situation */
|
|
29
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
30
|
+
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
31
|
+
|
|
32
|
+
/* connect to Deepgram API */
|
|
33
|
+
const queue = new EventEmitter()
|
|
34
|
+
const deepgram = Deepgram.createClient(this.params.key)
|
|
35
|
+
this.dg = deepgram.listen.live({
|
|
36
|
+
model: this.params.model,
|
|
37
|
+
version: this.params.version,
|
|
38
|
+
language: this.params.language,
|
|
39
|
+
channels: this.config.audioChannels,
|
|
40
|
+
sample_rate: this.config.audioSampleRate,
|
|
41
|
+
encoding: "linear16",
|
|
42
|
+
multichannel: false,
|
|
43
|
+
// endpointing: false, /* FIXME: ? */
|
|
44
|
+
interim_results: false,
|
|
45
|
+
smart_format: true,
|
|
46
|
+
punctuate: true,
|
|
47
|
+
filler_words: true,
|
|
48
|
+
diarize: true,
|
|
49
|
+
numerals: true,
|
|
50
|
+
paragraphs: true,
|
|
51
|
+
profanity_filter: true,
|
|
52
|
+
utterances: false,
|
|
53
|
+
})
|
|
54
|
+
await new Promise((resolve) => {
|
|
55
|
+
this.dg!.on(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
56
|
+
this.log("info", "Deepgram: connection open")
|
|
57
|
+
resolve(true)
|
|
58
|
+
})
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
/* hooks onto Deepgram API events */
|
|
62
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
63
|
+
this.log("info", "Deepgram: connection close")
|
|
64
|
+
})
|
|
65
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
66
|
+
const text = data.channel?.alternatives[0].transcript ?? ""
|
|
67
|
+
if (text === "")
|
|
68
|
+
return
|
|
69
|
+
queue.emit("text", text)
|
|
70
|
+
})
|
|
71
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
72
|
+
this.log("error", `Deepgram: ${error}`)
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
/* provide Duplex stream and internally attach to Deepgram API */
|
|
76
|
+
const dg = this.dg
|
|
77
|
+
this.stream = new Stream.Duplex({
|
|
78
|
+
write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
|
|
79
|
+
const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
|
|
80
|
+
if (data.byteLength === 0)
|
|
81
|
+
queue.emit("text", "")
|
|
82
|
+
else
|
|
83
|
+
dg.send(data)
|
|
84
|
+
callback()
|
|
85
|
+
},
|
|
86
|
+
read (size: number) {
|
|
87
|
+
queue.once("text", (text: string) => {
|
|
88
|
+
if (text !== "")
|
|
89
|
+
this.push(text)
|
|
90
|
+
})
|
|
91
|
+
}
|
|
92
|
+
})
|
|
93
|
+
}
|
|
94
|
+
async close () {
|
|
95
|
+
if (this.stream !== null) {
|
|
96
|
+
this.stream.destroy()
|
|
97
|
+
this.stream = null
|
|
98
|
+
}
|
|
99
|
+
if (this.dg !== null)
|
|
100
|
+
this.dg.requestClose()
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import Stream from "node:stream"
|
|
8
|
+
import { EventEmitter } from "node:events"
|
|
9
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
10
|
+
import * as DeepL from "deepl-node"
|
|
11
|
+
|
|
12
|
+
export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
13
|
+
private translator: DeepL.Translator | null = null
|
|
14
|
+
|
|
15
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
16
|
+
super(id, opts, args)
|
|
17
|
+
|
|
18
|
+
this.input = "text"
|
|
19
|
+
this.output = "text"
|
|
20
|
+
this.stream = null
|
|
21
|
+
|
|
22
|
+
this.configure({
|
|
23
|
+
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
|
|
24
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en-US)$/ },
|
|
25
|
+
dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ }
|
|
26
|
+
})
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async open () {
|
|
30
|
+
/* instantiate DeepL API SDK */
|
|
31
|
+
this.translator = new DeepL.Translator(this.params.key)
|
|
32
|
+
|
|
33
|
+
/* provide text-to-text translation */
|
|
34
|
+
const translate = async (text: string) => {
|
|
35
|
+
const result = await this.translator!.translateText(text, this.params.src, this.params.dst, {
|
|
36
|
+
splitSentences: "off"
|
|
37
|
+
})
|
|
38
|
+
return (result?.text ?? text)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/* establish a duplex stream and connect it to the translation */
|
|
42
|
+
const queue = new EventEmitter()
|
|
43
|
+
this.stream = new Stream.Duplex({
|
|
44
|
+
write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
|
|
45
|
+
const data = chunk.toString()
|
|
46
|
+
if (data === "") {
|
|
47
|
+
queue.emit("result", "")
|
|
48
|
+
callback()
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
translate(data).then((result) => {
|
|
52
|
+
queue.emit("result", result)
|
|
53
|
+
callback()
|
|
54
|
+
}).catch((err) => {
|
|
55
|
+
callback(err)
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
read (size: number) {
|
|
60
|
+
queue.once("result", (result: string) => {
|
|
61
|
+
this.push(result)
|
|
62
|
+
})
|
|
63
|
+
}
|
|
64
|
+
})
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async close () {
|
|
68
|
+
if (this.stream !== null) {
|
|
69
|
+
this.stream.destroy()
|
|
70
|
+
this.stream = null
|
|
71
|
+
}
|
|
72
|
+
if (this.translator !== null)
|
|
73
|
+
this.translator = null
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import Stream from "node:stream"
|
|
8
|
+
import PortAudio from "@gpeng/naudiodon"
|
|
9
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
10
|
+
import SpeechFlowUtil from "./speechflow-util"
|
|
11
|
+
|
|
12
|
+
export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
13
|
+
private io: PortAudio.IoStreamRead | PortAudio.IoStreamWrite | PortAudio.IoStreamDuplex | null = null
|
|
14
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
15
|
+
super(id, opts, args)
|
|
16
|
+
this.configure({
|
|
17
|
+
device: { type: "string", pos: 0, match: /^(.+?):(.+)$/ },
|
|
18
|
+
mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
|
|
19
|
+
})
|
|
20
|
+
}
|
|
21
|
+
async open () {
|
|
22
|
+
/* determine device */
|
|
23
|
+
const device = SpeechFlowUtil.audioDeviceFromURL(this.params.mode, this.params.device)
|
|
24
|
+
|
|
25
|
+
/* sanity check sample rate compatibility
|
|
26
|
+
(we still do not resample in input/output for simplification reasons) */
|
|
27
|
+
if (device.defaultSampleRate !== this.config.audioSampleRate)
|
|
28
|
+
throw new Error(`device audio sample rate ${device.defaultSampleRate} is ` +
|
|
29
|
+
`incompatible with required sample rate ${this.config.audioSampleRate}`)
|
|
30
|
+
|
|
31
|
+
/* establish device connection
|
|
32
|
+
Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
|
|
33
|
+
declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
|
|
34
|
+
so it is correct to cast it back to Stream.{Readable,Writable,Duplex} */
|
|
35
|
+
if (device.maxInputChannels > 0 && device.maxOutputChannels > 0) {
|
|
36
|
+
this.log("info", `resolved "${this.params.device}" to duplex device "${device.id}"`)
|
|
37
|
+
this.input = "audio"
|
|
38
|
+
this.output = "audio"
|
|
39
|
+
this.io = PortAudio.AudioIO({
|
|
40
|
+
inOptions: {
|
|
41
|
+
deviceId: device.id,
|
|
42
|
+
channelCount: this.config.audioChannels,
|
|
43
|
+
sampleRate: this.config.audioSampleRate,
|
|
44
|
+
sampleFormat: this.config.audioBitDepth
|
|
45
|
+
},
|
|
46
|
+
outOptions: {
|
|
47
|
+
deviceId: device.id,
|
|
48
|
+
channelCount: this.config.audioChannels,
|
|
49
|
+
sampleRate: this.config.audioSampleRate,
|
|
50
|
+
sampleFormat: this.config.audioBitDepth
|
|
51
|
+
}
|
|
52
|
+
})
|
|
53
|
+
this.stream = this.io as unknown as Stream.Duplex
|
|
54
|
+
}
|
|
55
|
+
else if (device.maxInputChannels > 0 && device.maxOutputChannels === 0) {
|
|
56
|
+
this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`)
|
|
57
|
+
this.input = "none"
|
|
58
|
+
this.output = "audio"
|
|
59
|
+
this.io = PortAudio.AudioIO({
|
|
60
|
+
inOptions: {
|
|
61
|
+
deviceId: device.id,
|
|
62
|
+
channelCount: this.config.audioChannels,
|
|
63
|
+
sampleRate: this.config.audioSampleRate,
|
|
64
|
+
sampleFormat: this.config.audioBitDepth
|
|
65
|
+
}
|
|
66
|
+
})
|
|
67
|
+
this.stream = this.io as unknown as Stream.Readable
|
|
68
|
+
}
|
|
69
|
+
else if (device.maxInputChannels === 0 && device.maxOutputChannels > 0) {
|
|
70
|
+
this.log("info", `resolved "${this.params.device}" to output device "${device.id}"`)
|
|
71
|
+
this.input = "audio"
|
|
72
|
+
this.output = "none"
|
|
73
|
+
this.io = PortAudio.AudioIO({
|
|
74
|
+
outOptions: {
|
|
75
|
+
deviceId: device.id,
|
|
76
|
+
channelCount: this.config.audioChannels,
|
|
77
|
+
sampleRate: this.config.audioSampleRate,
|
|
78
|
+
sampleFormat: this.config.audioBitDepth
|
|
79
|
+
}
|
|
80
|
+
})
|
|
81
|
+
this.stream = this.io as unknown as Stream.Writable
|
|
82
|
+
}
|
|
83
|
+
else
|
|
84
|
+
throw new Error(`device "${device.id}" does not have any input or output channels`)
|
|
85
|
+
|
|
86
|
+
/* pass-through errors */
|
|
87
|
+
this.io.on("error", (err) => {
|
|
88
|
+
this.emit("error", err)
|
|
89
|
+
})
|
|
90
|
+
}
|
|
91
|
+
async close () {
|
|
92
|
+
if (this.io !== null)
|
|
93
|
+
this.io.quit()
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import Stream from "node:stream"
|
|
8
|
+
import { EventEmitter } from "node:events"
|
|
9
|
+
|
|
10
|
+
import * as ElevenLabs from "elevenlabs"
|
|
11
|
+
import { getStreamAsBuffer } from "get-stream"
|
|
12
|
+
|
|
13
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
const elevenlabsVoices = {
|
|
17
|
+
"drew": { name: "Drew", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
18
|
+
"george": { name: "George", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
19
|
+
"bill": { name: "Bill", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
20
|
+
"daniel": { name: "Daniel", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
|
|
21
|
+
"brian": { name: "Brian", model: "eleven_turbo_v2", lang: [ "en" ] },
|
|
22
|
+
"sarah": { name: "Sarah", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
23
|
+
"racel": { name: "Racel", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
|
|
24
|
+
"grace": { name: "Grace", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
|
|
25
|
+
"matilda": { name: "Matilda", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
|
|
26
|
+
"alice": { name: "Alice", model: "eleven_turbo_v2", lang: [ "en" ] }
|
|
27
|
+
}
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
31
|
+
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
32
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
33
|
+
super(id, opts, args)
|
|
34
|
+
this.configure({
|
|
35
|
+
key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
|
|
36
|
+
voice: { type: "string", val: "Brian", pos: 0 },
|
|
37
|
+
language: { type: "string", val: "de", pos: 1 }
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
async open () {
|
|
41
|
+
this.input = "text"
|
|
42
|
+
this.output = "audio"
|
|
43
|
+
|
|
44
|
+
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
45
|
+
apiKey: this.params.key
|
|
46
|
+
})
|
|
47
|
+
const voices = await this.elevenlabs.voices.getAll()
|
|
48
|
+
const voice = voices.voices.find((voice) => voice.name === this.params.voice)
|
|
49
|
+
if (voice === undefined)
|
|
50
|
+
throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
|
|
51
|
+
const speechStream = (text: string) => {
|
|
52
|
+
return this.elevenlabs!.textToSpeech.convert(voice.voice_id, {
|
|
53
|
+
text,
|
|
54
|
+
optimize_streaming_latency: 2,
|
|
55
|
+
output_format: "pcm_16000", // S16LE
|
|
56
|
+
model_id: "eleven_flash_v2_5",
|
|
57
|
+
/*
|
|
58
|
+
voice_settings: {
|
|
59
|
+
stability: 0,
|
|
60
|
+
similarity_boost: 0
|
|
61
|
+
}
|
|
62
|
+
*/
|
|
63
|
+
}, {
|
|
64
|
+
timeoutInSeconds: 30,
|
|
65
|
+
maxRetries: 10
|
|
66
|
+
})
|
|
67
|
+
}
|
|
68
|
+
const queue = new EventEmitter()
|
|
69
|
+
this.stream = new Stream.Duplex({
|
|
70
|
+
write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
|
|
71
|
+
if (encoding !== "utf8" && encoding !== "utf-8")
|
|
72
|
+
callback(new Error("only text input supported by Elevenlabs node"))
|
|
73
|
+
const data = chunk.toString()
|
|
74
|
+
speechStream(data).then((stream) => {
|
|
75
|
+
getStreamAsBuffer(stream).then((buffer) => {
|
|
76
|
+
queue.emit("audio", buffer)
|
|
77
|
+
callback()
|
|
78
|
+
}).catch((error) => {
|
|
79
|
+
callback(error)
|
|
80
|
+
})
|
|
81
|
+
}).catch((error) => {
|
|
82
|
+
callback(error)
|
|
83
|
+
})
|
|
84
|
+
},
|
|
85
|
+
read (size: number) {
|
|
86
|
+
queue.once("audio", (buffer: Buffer) => {
|
|
87
|
+
this.push(buffer, "binary")
|
|
88
|
+
})
|
|
89
|
+
}
|
|
90
|
+
})
|
|
91
|
+
}
|
|
92
|
+
async close () {
|
|
93
|
+
if (this.stream !== null) {
|
|
94
|
+
this.stream.destroy()
|
|
95
|
+
this.stream = null
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import fs from "node:fs"
|
|
8
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
9
|
+
|
|
10
|
+
export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
11
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
12
|
+
super(id, opts, args)
|
|
13
|
+
this.configure({
|
|
14
|
+
path: { type: "string", pos: 0 },
|
|
15
|
+
mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w|rw)$/ },
|
|
16
|
+
type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ }
|
|
17
|
+
})
|
|
18
|
+
}
|
|
19
|
+
async open () {
|
|
20
|
+
if (this.params.mode === "r") {
|
|
21
|
+
this.output = this.params.type
|
|
22
|
+
if (this.params.path === "-")
|
|
23
|
+
this.stream = process.stdin
|
|
24
|
+
else
|
|
25
|
+
this.stream = fs.createReadStream(this.params.path,
|
|
26
|
+
{ encoding: this.params.type === "text" ? this.config.textEncoding : "binary" })
|
|
27
|
+
}
|
|
28
|
+
else if (this.params.mode === "w") {
|
|
29
|
+
this.input = this.params.type
|
|
30
|
+
if (this.params.path === "-")
|
|
31
|
+
this.stream = process.stdout
|
|
32
|
+
else
|
|
33
|
+
this.stream = fs.createWriteStream(this.params.path,
|
|
34
|
+
{ encoding: this.params.type === "text" ? this.config.textEncoding : "binary" })
|
|
35
|
+
}
|
|
36
|
+
else
|
|
37
|
+
throw new Error(`invalid file mode "${this.params.mode}"`)
|
|
38
|
+
}
|
|
39
|
+
async close () {
|
|
40
|
+
if (this.stream !== null && this.params.path !== "-") {
|
|
41
|
+
this.stream.destroy()
|
|
42
|
+
this.stream = null
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import Stream from "node:stream"
|
|
8
|
+
import ws from "ws"
|
|
9
|
+
import ReconnWebsocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
|
|
10
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
11
|
+
|
|
12
|
+
export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
|
|
13
|
+
private server: ws.WebSocketServer | null = null
|
|
14
|
+
private client: WebSocket | null = null
|
|
15
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
16
|
+
super(id, opts, args)
|
|
17
|
+
this.configure({
|
|
18
|
+
listen: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+))$/ },
|
|
19
|
+
connect: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/ },
|
|
20
|
+
type: { type: "string", val: "text", match: /^(?:audio|text)$/ }
|
|
21
|
+
})
|
|
22
|
+
}
|
|
23
|
+
async open () {
|
|
24
|
+
this.input = this.params.type
|
|
25
|
+
this.output = this.params.type
|
|
26
|
+
if (this.params.listen !== "") {
|
|
27
|
+
const url = new URL(this.params.listen)
|
|
28
|
+
let websocket: ws.WebSocket | null = null
|
|
29
|
+
const server = new ws.WebSocketServer({
|
|
30
|
+
host: url.hostname,
|
|
31
|
+
port: Number.parseInt(url.port),
|
|
32
|
+
path: url.pathname
|
|
33
|
+
})
|
|
34
|
+
server.on("listening", () => {
|
|
35
|
+
this.log("info", `listening on URL ${this.params.listen}`)
|
|
36
|
+
})
|
|
37
|
+
server.on("connection", (ws, request) => {
|
|
38
|
+
this.log("info", `connection opened on URL ${this.params.listen}`)
|
|
39
|
+
websocket = ws
|
|
40
|
+
})
|
|
41
|
+
server.on("close", () => {
|
|
42
|
+
this.log("info", `connection closed on URL ${this.params.listen}`)
|
|
43
|
+
websocket = null
|
|
44
|
+
})
|
|
45
|
+
server.on("error", (error) => {
|
|
46
|
+
this.log("error", `error on URL ${this.params.listen}: ${error.message}`)
|
|
47
|
+
websocket = null
|
|
48
|
+
})
|
|
49
|
+
this.stream = new Stream.Duplex({
|
|
50
|
+
write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
|
|
51
|
+
const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
|
|
52
|
+
if (websocket !== null) {
|
|
53
|
+
websocket.send(data, (error) => {
|
|
54
|
+
if (error) callback(error)
|
|
55
|
+
else callback()
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
else
|
|
59
|
+
callback(new Error("still no Websocket connection available"))
|
|
60
|
+
},
|
|
61
|
+
read (size: number) {
|
|
62
|
+
if (websocket !== null) {
|
|
63
|
+
websocket.once("message", (data, isBinary) => {
|
|
64
|
+
this.push(data, isBinary ? "binary" : "utf8")
|
|
65
|
+
})
|
|
66
|
+
}
|
|
67
|
+
else
|
|
68
|
+
throw new Error("still no Websocket connection available")
|
|
69
|
+
}
|
|
70
|
+
})
|
|
71
|
+
}
|
|
72
|
+
else if (this.params.connect !== "") {
|
|
73
|
+
this.client = new ReconnWebsocket(this.params.connect, [], {
|
|
74
|
+
WebSocket: ws,
|
|
75
|
+
WebSocketOptions: {},
|
|
76
|
+
reconnectionDelayGrowFactor: 1.3,
|
|
77
|
+
maxReconnectionDelay: 4000,
|
|
78
|
+
minReconnectionDelay: 1000,
|
|
79
|
+
connectionTimeout: 4000,
|
|
80
|
+
minUptime: 5000
|
|
81
|
+
})
|
|
82
|
+
this.client.addEventListener("open", (ev: Event) => {
|
|
83
|
+
this.log("info", `connection opened on URL ${this.params.connect}`)
|
|
84
|
+
})
|
|
85
|
+
this.client.addEventListener("close", (ev: Event) => {
|
|
86
|
+
this.log("info", `connection closed on URL ${this.params.connect}`)
|
|
87
|
+
})
|
|
88
|
+
this.client.addEventListener("error", (ev: ErrorEvent) => {
|
|
89
|
+
this.log("error", `error on URL ${this.params.connect}: ${ev.error.message}`)
|
|
90
|
+
})
|
|
91
|
+
const client = this.client
|
|
92
|
+
client.binaryType = "arraybuffer"
|
|
93
|
+
this.stream = new Stream.Duplex({
|
|
94
|
+
write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
|
|
95
|
+
const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
|
|
96
|
+
if (client.OPEN) {
|
|
97
|
+
client.send(data)
|
|
98
|
+
callback()
|
|
99
|
+
}
|
|
100
|
+
else
|
|
101
|
+
callback(new Error("still no Websocket connection available"))
|
|
102
|
+
},
|
|
103
|
+
read (size: number) {
|
|
104
|
+
if (client.OPEN) {
|
|
105
|
+
client.addEventListener("message", (ev: MessageEvent) => {
|
|
106
|
+
if (ev.data instanceof ArrayBuffer)
|
|
107
|
+
this.push(ev.data, "binary")
|
|
108
|
+
else
|
|
109
|
+
this.push(ev.data, "utf8")
|
|
110
|
+
}, { once: true })
|
|
111
|
+
}
|
|
112
|
+
else
|
|
113
|
+
throw new Error("still no Websocket connection available")
|
|
114
|
+
}
|
|
115
|
+
})
|
|
116
|
+
}
|
|
117
|
+
else
|
|
118
|
+
throw new Error("neither listen nor connect mode requested")
|
|
119
|
+
}
|
|
120
|
+
async close () {
|
|
121
|
+
if (this.server !== null) {
|
|
122
|
+
await new Promise<void>((resolve, reject) => {
|
|
123
|
+
this.server!.close((error) => {
|
|
124
|
+
if (error) reject(error)
|
|
125
|
+
else resolve()
|
|
126
|
+
})
|
|
127
|
+
})
|
|
128
|
+
this.server = null
|
|
129
|
+
}
|
|
130
|
+
if (this.client !== null) {
|
|
131
|
+
this.client!.close()
|
|
132
|
+
this.client = null
|
|
133
|
+
}
|
|
134
|
+
if (this.stream !== null) {
|
|
135
|
+
this.stream.destroy()
|
|
136
|
+
this.stream = null
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|