speechflow 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 580 286">
3
+ <!-- Generator: Adobe Illustrator 29.4.0, SVG Export Plug-In . SVG Version: 2.1.0 Build 152) -->
4
+ <defs>
5
+ <style>
6
+ .st0 {
7
+ fill: #fff;
8
+ }
9
+
10
+ .st0, .st1 {
11
+ fill-rule: evenodd;
12
+ }
13
+
14
+ .st1 {
15
+ fill: #b06820;
16
+ }
17
+
18
+ .st2 {
19
+ fill: #369;
20
+ }
21
+ </style>
22
+ </defs>
23
+ <g>
24
+ <path d="M23.1,90.8c4.1,2.8,9.8,4.8,15.9,4.8,10.8,0,17.5-6.1,17.5-15.1s-4.2-12.9-14.7-17c-11.7-4.1-18.7-10.1-18.7-20s8.8-18.4,21.1-18.4,12,1.8,14.4,3.4l-1.9,4.3c-1.9-1.4-6.6-3.4-12.8-3.4-11.7,0-15.7,7.5-15.7,13.3,0,8.1,4.6,12.2,15.1,16.3,12,4.8,18.3,10.2,18.3,21.1s-7.5,19.9-23.1,19.9-13.6-2-17.3-4.8l1.9-4.3Z"/>
25
+ <path d="M70,63.7c0-6.5-.2-11.6-.4-16.5h4.8l.4,9.8h.2c3.7-6.9,10-11,19.1-11,13.2,0,22.2,11,22.2,26.2s-11,27.9-23.5,27.9-14-3.3-17.6-9.6h-.2v29.7h-5.1v-56.6ZM75,78.5c0,1.4.2,2.8.4,4.1,2.2,8,9,13.1,17,13.1,11.9,0,18.6-9.6,18.6-23.4s-6.5-22.2-18.2-22.2-14.9,5.3-17.1,13.8c-.3,1.4-.8,3-.8,4.4v10.1Z"/>
26
+ <path d="M124.8,72.5c0,16.6,9,23.1,19.7,23.1s11.4-1.5,14.3-2.9l1.3,4.1c-1.9,1.1-7.2,3.2-16.3,3.2-14.9,0-24.1-10.8-24.1-26s9.9-28,23.3-28,20.1,15.9,20.1,23.4,0,2.3-.2,3.1h-38.1ZM157.8,68.4c.1-7.2-2.9-18.2-15.4-18.2s-16.4,10.2-17.2,18.2h32.6Z"/>
27
+ <path d="M171.7,72.5c0,16.6,9,23.1,19.7,23.1s11.4-1.5,14.3-2.9l1.3,4.1c-1.9,1.1-7.2,3.2-16.3,3.2-14.9,0-24.1-10.8-24.1-26s9.9-28,23.3-28,20.1,15.9,20.1,23.4,0,2.3-.2,3.1h-38.1ZM204.6,68.4c.1-7.2-2.9-18.2-15.4-18.2s-16.4,10.2-17.2,18.2h32.6Z"/>
28
+ <path d="M253.4,96.6c-2.3,1.2-7.6,3.3-15.1,3.3-15,0-24.9-10.8-24.9-26.4s11.2-27.6,26.7-27.6,11.5,1.8,13.6,3.2l-1.8,4.2c-2.6-1.4-6.6-3-12.3-3-13.9,0-21,10.7-21,22.8s8.5,22.3,20.5,22.3,10.1-1.7,12.9-2.9l1.4,4Z"/>
29
+ <path d="M260.4,22.4h5.1v34h.2c1.5-2.9,4-5.7,7.1-7.5,2.9-1.8,6.5-2.9,10.4-2.9s17.9,2.5,17.9,21.7v31.2h-5.1v-30.6c0-9.4-3.7-17.9-14.2-17.9s-13.5,5.3-15.6,11.7c-.5,1.5-.8,3-.8,5.1v31.7h-5.1V22.4Z"/>
30
+ </g>
31
+ <g>
32
+ <path class="st2" d="M102.1,122.7h71.1v21.6h-44.7v26.6h41.8v21.4h-41.8v46.8h-26.4v-116.4Z"/>
33
+ <path class="st2" d="M184.5,122.7h26.4v94.3h46.3v22.1h-72.7v-116.4Z"/>
34
+ </g>
35
+ <path class="st1" d="M329.8,110.5c-43.9,0-79.7,31.5-79.7,70.3s6.5,32.2,18.3,44.8c2.3,9.6-.5,19.8-7.5,26.9-2.9,2.9-.9,8,3.3,8,13.4,0,26.2-5.2,35.8-14.5,9.5,3.4,19.5,5.1,29.8,5.1,43.9,0,80.3-31.5,80.3-70.3s-36.4-70.3-80.3-70.3Z"/>
36
+ <g id="Icon">
37
+ <path class="st0" d="M318.3,174.4v9c0,2.7,1.1,5.2,3,7.1s4.5,3,7.1,3h9c2.7,0,5.2-1.1,7.1-3s3-4.5,3-7.1v-9c0-2.7-1.1-5.2-3-7.1-1.9-1.9-4.5-3-7.1-3h-9c-2.7,0-5.2,1.1-7.1,3s-3,4.5-3,7.1Z"/>
38
+ <path class="st0" d="M284.7,140.8v9c0,2.7,1.1,5.2,3,7.1s4.5,3,7.1,3h9c2.7,0,5.2-1.1,7.1-3s3-4.5,3-7.1v-9c0-2.7-1.1-5.2-3-7.1s-4.5-3-7.1-3h-9c-2.7,0-5.2,1.1-7.1,3s-3,4.5-3,7.1Z"/>
39
+ <path class="st0" d="M351.9,208v9c0,2.7,1.1,5.2,3,7.1,1.9,1.9,4.5,3,7.1,3h9c2.7,0,5.2-1.1,7.1-3,1.9-1.9,3-4.5,3-7.1v-9c0-2.7-1.1-5.2-3-7.1s-4.5-3-7.1-3h-9c-2.7,0-5.2,1.1-7.1,3s-3,4.5-3,7.1Z"/>
40
+ <path class="st0" d="M310.4,148.7h58.3c1.5,0,2.9.6,4,1.6,1,1.1,1.6,2.5,1.6,4v15.7c0,1.5-.6,2.9-1.6,4-1.1,1-2.5,1.6-4,1.6h-13.4c-1.9,0-3.4,1.5-3.4,3.4s1.5,3.4,3.4,3.4h13.4c3.3,0,6.4-1.3,8.7-3.6,2.3-2.3,3.6-5.4,3.6-8.7v-15.7c0-3.3-1.3-6.4-3.6-8.7-2.3-2.3-5.4-3.6-8.7-3.6h-58.3c-1.9,0-3.4,1.5-3.4,3.4s1.5,3.4,3.4,3.4Z"/>
41
+ <path class="st0" d="M337.3,209.2h-40.3c-1.5,0-2.9-.6-4-1.6-1-1.1-1.6-2.5-1.6-4v-15.7c0-1.5.6-2.9,1.6-4,1.1-1,2.5-1.6,4-1.6h24.6c1.9,0,3.4-1.5,3.4-3.4s-1.5-3.4-3.4-3.4h-24.6c-3.3,0-6.4,1.3-8.7,3.6-2.3,2.3-3.6,5.4-3.6,8.7v15.7c0,3.3,1.3,6.4,3.6,8.7,2.3,2.3,5.4,3.6,8.7,3.6h40.3c1.9,0,3.4-1.5,3.4-3.4s-1.5-3.4-3.4-3.4Z"/>
42
+ <path class="st0" d="M366.6,185.5l-6.6-6.6,6.6-6.6c1.3-1.3,1.3-3.4,0-4.8-1.3-1.3-3.4-1.3-4.8,0l-9,9c-1.3,1.3-1.3,3.4,0,4.8l9,9c1.3,1.3,3.4,1.3,4.8,0,1.3-1.3,1.3-3.4,0-4.8h0Z"/>
43
+ <path class="st0" d="M335.2,223.9l9-9c1.3-1.3,1.3-3.4,0-4.8l-9-9c-1.3-1.3-3.4-1.3-4.8,0-1.3,1.3-1.3,3.4,0,4.8l6.6,6.6-6.6,6.6c-1.3,1.3-1.3,3.4,0,4.8,1.3,1.3,3.4,1.3,4.8,0h0Z"/>
44
+ </g>
45
+ <path class="st2" d="M432.3,239.1l-27.6-116.4h28.1l8.8,48c2.6,13.8,5,28.8,6.9,40.6h.3c1.9-12.6,4.7-26.6,7.6-40.9l9.8-47.7h28l9.3,49c2.6,13.6,4.5,26.1,6.2,39h.3c1.7-13,4.3-26.6,6.7-40.4l9.5-47.7h26.8l-30,116.4h-28.5l-9.8-50.1c-2.2-11.7-4.1-22.6-5.5-35.9h-.3c-2.1,13.1-4,24.2-6.7,35.9l-11.1,50.1h-28.8Z"/>
46
+ </svg>
@@ -0,0 +1,102 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ import { EventEmitter } from "node:events"
8
+ import Stream from "node:stream"
9
+ import * as Deepgram from "@deepgram/sdk"
10
+ import SpeechFlowNode from "./speechflow-node"
11
+
12
+ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
13
+ private dg: Deepgram.LiveClient | null = null
14
+ constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
15
+ super(id, opts, args)
16
+ this.configure({
17
+ key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPGRAM },
18
+ model: { type: "string", val: "nova-2", pos: 0 }, /* FIXME: nova-3 multiligual */
19
+ version: { type: "string", val: "latest", pos: 1 },
20
+ language: { type: "string", val: "de", pos: 2 }
21
+ })
22
+ }
23
+ async open () {
24
+ this.input = "audio"
25
+ this.output = "text"
26
+ this.stream = null
27
+
28
+ /* sanity check situation */
29
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
30
+ throw new Error("Deepgram node currently supports PCM-S16LE audio only")
31
+
32
+ /* connect to Deepgram API */
33
+ const queue = new EventEmitter()
34
+ const deepgram = Deepgram.createClient(this.params.key)
35
+ this.dg = deepgram.listen.live({
36
+ model: this.params.model,
37
+ version: this.params.version,
38
+ language: this.params.language,
39
+ channels: this.config.audioChannels,
40
+ sample_rate: this.config.audioSampleRate,
41
+ encoding: "linear16",
42
+ multichannel: false,
43
+ // endpointing: false, /* FIXME: ? */
44
+ interim_results: false,
45
+ smart_format: true,
46
+ punctuate: true,
47
+ filler_words: true,
48
+ diarize: true,
49
+ numerals: true,
50
+ paragraphs: true,
51
+ profanity_filter: true,
52
+ utterances: false,
53
+ })
54
+ await new Promise((resolve) => {
55
+ this.dg!.on(Deepgram.LiveTranscriptionEvents.Open, () => {
56
+ this.log("info", "Deepgram: connection open")
57
+ resolve(true)
58
+ })
59
+ })
60
+
61
+ /* hooks onto Deepgram API events */
62
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
63
+ this.log("info", "Deepgram: connection close")
64
+ })
65
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
66
+ const text = data.channel?.alternatives[0].transcript ?? ""
67
+ if (text === "")
68
+ return
69
+ queue.emit("text", text)
70
+ })
71
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
72
+ this.log("error", `Deepgram: ${error}`)
73
+ })
74
+
75
+ /* provide Duplex stream and internally attach to Deepgram API */
76
+ const dg = this.dg
77
+ this.stream = new Stream.Duplex({
78
+ write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
79
+ const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
80
+ if (data.byteLength === 0)
81
+ queue.emit("text", "")
82
+ else
83
+ dg.send(data)
84
+ callback()
85
+ },
86
+ read (size: number) {
87
+ queue.once("text", (text: string) => {
88
+ if (text !== "")
89
+ this.push(text)
90
+ })
91
+ }
92
+ })
93
+ }
94
+ async close () {
95
+ if (this.stream !== null) {
96
+ this.stream.destroy()
97
+ this.stream = null
98
+ }
99
+ if (this.dg !== null)
100
+ this.dg.requestClose()
101
+ }
102
+ }
@@ -0,0 +1,76 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ import Stream from "node:stream"
8
+ import { EventEmitter } from "node:events"
9
+ import SpeechFlowNode from "./speechflow-node"
10
+ import * as DeepL from "deepl-node"
11
+
12
+ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
13
+ private translator: DeepL.Translator | null = null
14
+
15
+ constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
16
+ super(id, opts, args)
17
+
18
+ this.input = "text"
19
+ this.output = "text"
20
+ this.stream = null
21
+
22
+ this.configure({
23
+ key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
24
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en-US)$/ },
25
+ dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ }
26
+ })
27
+ }
28
+
29
+ async open () {
30
+ /* instantiate DeepL API SDK */
31
+ this.translator = new DeepL.Translator(this.params.key)
32
+
33
+ /* provide text-to-text translation */
34
+ const translate = async (text: string) => {
35
+ const result = await this.translator!.translateText(text, this.params.src, this.params.dst, {
36
+ splitSentences: "off"
37
+ })
38
+ return (result?.text ?? text)
39
+ }
40
+
41
+ /* establish a duplex stream and connect it to the translation */
42
+ const queue = new EventEmitter()
43
+ this.stream = new Stream.Duplex({
44
+ write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
45
+ const data = chunk.toString()
46
+ if (data === "") {
47
+ queue.emit("result", "")
48
+ callback()
49
+ }
50
+ else {
51
+ translate(data).then((result) => {
52
+ queue.emit("result", result)
53
+ callback()
54
+ }).catch((err) => {
55
+ callback(err)
56
+ })
57
+ }
58
+ },
59
+ read (size: number) {
60
+ queue.once("result", (result: string) => {
61
+ this.push(result)
62
+ })
63
+ }
64
+ })
65
+ }
66
+
67
+ async close () {
68
+ if (this.stream !== null) {
69
+ this.stream.destroy()
70
+ this.stream = null
71
+ }
72
+ if (this.translator !== null)
73
+ this.translator = null
74
+ }
75
+ }
76
+
@@ -0,0 +1,96 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ import Stream from "node:stream"
8
+ import PortAudio from "@gpeng/naudiodon"
9
+ import SpeechFlowNode from "./speechflow-node"
10
+ import SpeechFlowUtil from "./speechflow-util"
11
+
12
+ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
13
+ private io: PortAudio.IoStreamRead | PortAudio.IoStreamWrite | PortAudio.IoStreamDuplex | null = null
14
+ constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
15
+ super(id, opts, args)
16
+ this.configure({
17
+ device: { type: "string", pos: 0, match: /^(.+?):(.+)$/ },
18
+ mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
19
+ })
20
+ }
21
+ async open () {
22
+ /* determine device */
23
+ const device = SpeechFlowUtil.audioDeviceFromURL(this.params.mode, this.params.device)
24
+
25
+ /* sanity check sample rate compatibility
26
+ (we still do not resample in input/output for simplification reasons) */
27
+ if (device.defaultSampleRate !== this.config.audioSampleRate)
28
+ throw new Error(`device audio sample rate ${device.defaultSampleRate} is ` +
29
+ `incompatible with required sample rate ${this.config.audioSampleRate}`)
30
+
31
+ /* establish device connection
32
+ Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
33
+ declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
34
+ so it is correct to cast it back to Stream.{Readable,Writable,Duplex} */
35
+ if (device.maxInputChannels > 0 && device.maxOutputChannels > 0) {
36
+ this.log("info", `resolved "${this.params.device}" to duplex device "${device.id}"`)
37
+ this.input = "audio"
38
+ this.output = "audio"
39
+ this.io = PortAudio.AudioIO({
40
+ inOptions: {
41
+ deviceId: device.id,
42
+ channelCount: this.config.audioChannels,
43
+ sampleRate: this.config.audioSampleRate,
44
+ sampleFormat: this.config.audioBitDepth
45
+ },
46
+ outOptions: {
47
+ deviceId: device.id,
48
+ channelCount: this.config.audioChannels,
49
+ sampleRate: this.config.audioSampleRate,
50
+ sampleFormat: this.config.audioBitDepth
51
+ }
52
+ })
53
+ this.stream = this.io as unknown as Stream.Duplex
54
+ }
55
+ else if (device.maxInputChannels > 0 && device.maxOutputChannels === 0) {
56
+ this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`)
57
+ this.input = "none"
58
+ this.output = "audio"
59
+ this.io = PortAudio.AudioIO({
60
+ inOptions: {
61
+ deviceId: device.id,
62
+ channelCount: this.config.audioChannels,
63
+ sampleRate: this.config.audioSampleRate,
64
+ sampleFormat: this.config.audioBitDepth
65
+ }
66
+ })
67
+ this.stream = this.io as unknown as Stream.Readable
68
+ }
69
+ else if (device.maxInputChannels === 0 && device.maxOutputChannels > 0) {
70
+ this.log("info", `resolved "${this.params.device}" to output device "${device.id}"`)
71
+ this.input = "audio"
72
+ this.output = "none"
73
+ this.io = PortAudio.AudioIO({
74
+ outOptions: {
75
+ deviceId: device.id,
76
+ channelCount: this.config.audioChannels,
77
+ sampleRate: this.config.audioSampleRate,
78
+ sampleFormat: this.config.audioBitDepth
79
+ }
80
+ })
81
+ this.stream = this.io as unknown as Stream.Writable
82
+ }
83
+ else
84
+ throw new Error(`device "${device.id}" does not have any input or output channels`)
85
+
86
+ /* pass-through errors */
87
+ this.io.on("error", (err) => {
88
+ this.emit("error", err)
89
+ })
90
+ }
91
+ async close () {
92
+ if (this.io !== null)
93
+ this.io.quit()
94
+ }
95
+ }
96
+
@@ -0,0 +1,99 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ import Stream from "node:stream"
8
+ import { EventEmitter } from "node:events"
9
+
10
+ import * as ElevenLabs from "elevenlabs"
11
+ import { getStreamAsBuffer } from "get-stream"
12
+
13
+ import SpeechFlowNode from "./speechflow-node"
14
+
15
+ /*
16
+ const elevenlabsVoices = {
17
+ "drew": { name: "Drew", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
18
+ "george": { name: "George", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
19
+ "bill": { name: "Bill", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
20
+ "daniel": { name: "Daniel", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
21
+ "brian": { name: "Brian", model: "eleven_turbo_v2", lang: [ "en" ] },
22
+ "sarah": { name: "Sarah", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
23
+ "racel": { name: "Racel", model: "eleven_multilingual_v2", lang: [ "en", "de" ] },
24
+ "grace": { name: "Grace", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
25
+ "matilda": { name: "Matilda", model: "eleven_multilingual_v1", lang: [ "en", "de" ] },
26
+ "alice": { name: "Alice", model: "eleven_turbo_v2", lang: [ "en" ] }
27
+ }
28
+ */
29
+
30
+ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
31
+ private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
32
+ constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
33
+ super(id, opts, args)
34
+ this.configure({
35
+ key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
36
+ voice: { type: "string", val: "Brian", pos: 0 },
37
+ language: { type: "string", val: "de", pos: 1 }
38
+ })
39
+ }
40
+ async open () {
41
+ this.input = "text"
42
+ this.output = "audio"
43
+
44
+ this.elevenlabs = new ElevenLabs.ElevenLabsClient({
45
+ apiKey: this.params.key
46
+ })
47
+ const voices = await this.elevenlabs.voices.getAll()
48
+ const voice = voices.voices.find((voice) => voice.name === this.params.voice)
49
+ if (voice === undefined)
50
+ throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
51
+ const speechStream = (text: string) => {
52
+ return this.elevenlabs!.textToSpeech.convert(voice.voice_id, {
53
+ text,
54
+ optimize_streaming_latency: 2,
55
+ output_format: "pcm_16000", // S16LE
56
+ model_id: "eleven_flash_v2_5",
57
+ /*
58
+ voice_settings: {
59
+ stability: 0,
60
+ similarity_boost: 0
61
+ }
62
+ */
63
+ }, {
64
+ timeoutInSeconds: 30,
65
+ maxRetries: 10
66
+ })
67
+ }
68
+ const queue = new EventEmitter()
69
+ this.stream = new Stream.Duplex({
70
+ write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
71
+ if (encoding !== "utf8" && encoding !== "utf-8")
72
+ callback(new Error("only text input supported by Elevenlabs node"))
73
+ const data = chunk.toString()
74
+ speechStream(data).then((stream) => {
75
+ getStreamAsBuffer(stream).then((buffer) => {
76
+ queue.emit("audio", buffer)
77
+ callback()
78
+ }).catch((error) => {
79
+ callback(error)
80
+ })
81
+ }).catch((error) => {
82
+ callback(error)
83
+ })
84
+ },
85
+ read (size: number) {
86
+ queue.once("audio", (buffer: Buffer) => {
87
+ this.push(buffer, "binary")
88
+ })
89
+ }
90
+ })
91
+ }
92
+ async close () {
93
+ if (this.stream !== null) {
94
+ this.stream.destroy()
95
+ this.stream = null
96
+ }
97
+ }
98
+ }
99
+
@@ -0,0 +1,46 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ import fs from "node:fs"
8
+ import SpeechFlowNode from "./speechflow-node"
9
+
10
+ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
11
+ constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
12
+ super(id, opts, args)
13
+ this.configure({
14
+ path: { type: "string", pos: 0 },
15
+ mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w|rw)$/ },
16
+ type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ }
17
+ })
18
+ }
19
+ async open () {
20
+ if (this.params.mode === "r") {
21
+ this.output = this.params.type
22
+ if (this.params.path === "-")
23
+ this.stream = process.stdin
24
+ else
25
+ this.stream = fs.createReadStream(this.params.path,
26
+ { encoding: this.params.type === "text" ? this.config.textEncoding : "binary" })
27
+ }
28
+ else if (this.params.mode === "w") {
29
+ this.input = this.params.type
30
+ if (this.params.path === "-")
31
+ this.stream = process.stdout
32
+ else
33
+ this.stream = fs.createWriteStream(this.params.path,
34
+ { encoding: this.params.type === "text" ? this.config.textEncoding : "binary" })
35
+ }
36
+ else
37
+ throw new Error(`invalid file mode "${this.params.mode}"`)
38
+ }
39
+ async close () {
40
+ if (this.stream !== null && this.params.path !== "-") {
41
+ this.stream.destroy()
42
+ this.stream = null
43
+ }
44
+ }
45
+ }
46
+
@@ -0,0 +1,140 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ import Stream from "node:stream"
8
+ import ws from "ws"
9
+ import ReconnWebsocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
10
+ import SpeechFlowNode from "./speechflow-node"
11
+
12
+ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
13
+ private server: ws.WebSocketServer | null = null
14
+ private client: WebSocket | null = null
15
+ constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
16
+ super(id, opts, args)
17
+ this.configure({
18
+ listen: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+))$/ },
19
+ connect: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/ },
20
+ type: { type: "string", val: "text", match: /^(?:audio|text)$/ }
21
+ })
22
+ }
23
+ async open () {
24
+ this.input = this.params.type
25
+ this.output = this.params.type
26
+ if (this.params.listen !== "") {
27
+ const url = new URL(this.params.listen)
28
+ let websocket: ws.WebSocket | null = null
29
+ const server = new ws.WebSocketServer({
30
+ host: url.hostname,
31
+ port: Number.parseInt(url.port),
32
+ path: url.pathname
33
+ })
34
+ server.on("listening", () => {
35
+ this.log("info", `listening on URL ${this.params.listen}`)
36
+ })
37
+ server.on("connection", (ws, request) => {
38
+ this.log("info", `connection opened on URL ${this.params.listen}`)
39
+ websocket = ws
40
+ })
41
+ server.on("close", () => {
42
+ this.log("info", `connection closed on URL ${this.params.listen}`)
43
+ websocket = null
44
+ })
45
+ server.on("error", (error) => {
46
+ this.log("error", `error on URL ${this.params.listen}: ${error.message}`)
47
+ websocket = null
48
+ })
49
+ this.stream = new Stream.Duplex({
50
+ write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
51
+ const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
52
+ if (websocket !== null) {
53
+ websocket.send(data, (error) => {
54
+ if (error) callback(error)
55
+ else callback()
56
+ })
57
+ }
58
+ else
59
+ callback(new Error("still no Websocket connection available"))
60
+ },
61
+ read (size: number) {
62
+ if (websocket !== null) {
63
+ websocket.once("message", (data, isBinary) => {
64
+ this.push(data, isBinary ? "binary" : "utf8")
65
+ })
66
+ }
67
+ else
68
+ throw new Error("still no Websocket connection available")
69
+ }
70
+ })
71
+ }
72
+ else if (this.params.connect !== "") {
73
+ this.client = new ReconnWebsocket(this.params.connect, [], {
74
+ WebSocket: ws,
75
+ WebSocketOptions: {},
76
+ reconnectionDelayGrowFactor: 1.3,
77
+ maxReconnectionDelay: 4000,
78
+ minReconnectionDelay: 1000,
79
+ connectionTimeout: 4000,
80
+ minUptime: 5000
81
+ })
82
+ this.client.addEventListener("open", (ev: Event) => {
83
+ this.log("info", `connection opened on URL ${this.params.connect}`)
84
+ })
85
+ this.client.addEventListener("close", (ev: Event) => {
86
+ this.log("info", `connection closed on URL ${this.params.connect}`)
87
+ })
88
+ this.client.addEventListener("error", (ev: ErrorEvent) => {
89
+ this.log("error", `error on URL ${this.params.connect}: ${ev.error.message}`)
90
+ })
91
+ const client = this.client
92
+ client.binaryType = "arraybuffer"
93
+ this.stream = new Stream.Duplex({
94
+ write (chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null | undefined) => void) {
95
+ const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength)
96
+ if (client.OPEN) {
97
+ client.send(data)
98
+ callback()
99
+ }
100
+ else
101
+ callback(new Error("still no Websocket connection available"))
102
+ },
103
+ read (size: number) {
104
+ if (client.OPEN) {
105
+ client.addEventListener("message", (ev: MessageEvent) => {
106
+ if (ev.data instanceof ArrayBuffer)
107
+ this.push(ev.data, "binary")
108
+ else
109
+ this.push(ev.data, "utf8")
110
+ }, { once: true })
111
+ }
112
+ else
113
+ throw new Error("still no Websocket connection available")
114
+ }
115
+ })
116
+ }
117
+ else
118
+ throw new Error("neither listen nor connect mode requested")
119
+ }
120
+ async close () {
121
+ if (this.server !== null) {
122
+ await new Promise<void>((resolve, reject) => {
123
+ this.server!.close((error) => {
124
+ if (error) reject(error)
125
+ else resolve()
126
+ })
127
+ })
128
+ this.server = null
129
+ }
130
+ if (this.client !== null) {
131
+ this.client!.close()
132
+ this.client = null
133
+ }
134
+ if (this.stream !== null) {
135
+ this.stream.destroy()
136
+ this.stream = null
137
+ }
138
+ }
139
+ }
140
+