speechflow 0.9.4 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +227 -54
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
- package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
- package/dst/speechflow-node-a2a-wav.d.ts +11 -0
- package/dst/speechflow-node-a2a-wav.js +170 -0
- package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
- package/dst/speechflow-node-a2t-deepgram.js +220 -0
- package/dst/speechflow-node-deepgram.d.ts +3 -1
- package/dst/speechflow-node-deepgram.js +86 -22
- package/dst/speechflow-node-deepl.d.ts +3 -1
- package/dst/speechflow-node-deepl.js +25 -20
- package/dst/speechflow-node-device.d.ts +3 -1
- package/dst/speechflow-node-device.js +53 -2
- package/dst/speechflow-node-elevenlabs.d.ts +4 -1
- package/dst/speechflow-node-elevenlabs.js +88 -49
- package/dst/speechflow-node-ffmpeg.d.ts +3 -1
- package/dst/speechflow-node-ffmpeg.js +42 -4
- package/dst/speechflow-node-file.d.ts +3 -1
- package/dst/speechflow-node-file.js +84 -13
- package/dst/speechflow-node-format.d.ts +11 -0
- package/dst/speechflow-node-format.js +80 -0
- package/dst/speechflow-node-gemma.d.ts +3 -1
- package/dst/speechflow-node-gemma.js +84 -23
- package/dst/speechflow-node-mqtt.d.ts +13 -0
- package/dst/speechflow-node-mqtt.js +181 -0
- package/dst/speechflow-node-opus.d.ts +12 -0
- package/dst/speechflow-node-opus.js +135 -0
- package/dst/speechflow-node-subtitle.d.ts +12 -0
- package/dst/speechflow-node-subtitle.js +96 -0
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
- package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
- package/dst/speechflow-node-t2t-deepl.js +133 -0
- package/dst/speechflow-node-t2t-format.d.ts +11 -0
- package/dst/speechflow-node-t2t-format.js +80 -0
- package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
- package/dst/speechflow-node-t2t-gemma.js +213 -0
- package/dst/speechflow-node-t2t-opus.d.ts +12 -0
- package/dst/speechflow-node-t2t-opus.js +135 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
- package/dst/speechflow-node-t2t-subtitle.js +96 -0
- package/dst/speechflow-node-trace.d.ts +11 -0
- package/dst/speechflow-node-trace.js +88 -0
- package/dst/speechflow-node-wav.d.ts +11 -0
- package/dst/speechflow-node-wav.js +170 -0
- package/dst/speechflow-node-websocket.d.ts +3 -1
- package/dst/speechflow-node-websocket.js +149 -49
- package/dst/speechflow-node-whisper-common.d.ts +34 -0
- package/dst/speechflow-node-whisper-common.js +7 -0
- package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-ggml.js +97 -0
- package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker.js +116 -0
- package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker2.js +82 -0
- package/dst/speechflow-node-whisper.d.ts +19 -0
- package/dst/speechflow-node-whisper.js +604 -0
- package/dst/speechflow-node-x2x-trace.d.ts +11 -0
- package/dst/speechflow-node-x2x-trace.js +88 -0
- package/dst/speechflow-node-xio-device.d.ts +13 -0
- package/dst/speechflow-node-xio-device.js +205 -0
- package/dst/speechflow-node-xio-file.d.ts +11 -0
- package/dst/speechflow-node-xio-file.js +176 -0
- package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
- package/dst/speechflow-node-xio-mqtt.js +181 -0
- package/dst/speechflow-node-xio-websocket.d.ts +13 -0
- package/dst/speechflow-node-xio-websocket.js +275 -0
- package/dst/speechflow-node.d.ts +25 -7
- package/dst/speechflow-node.js +74 -9
- package/dst/speechflow-utils.d.ts +23 -0
- package/dst/speechflow-utils.js +194 -0
- package/dst/speechflow.js +146 -43
- package/etc/biome.jsonc +12 -4
- package/etc/stx.conf +65 -0
- package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
- package/package.json +49 -31
- package/sample.yaml +61 -23
- package/src/lib.d.ts +6 -1
- package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
- package/src/speechflow-node-a2a-wav.ts +143 -0
- package/src/speechflow-node-a2t-deepgram.ts +199 -0
- package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
- package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
- package/src/speechflow-node-t2t-format.ts +85 -0
- package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
- package/src/speechflow-node-t2t-opus.ts +111 -0
- package/src/speechflow-node-t2t-subtitle.ts +101 -0
- package/src/speechflow-node-x2x-trace.ts +92 -0
- package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
- package/src/speechflow-node-xio-file.ts +153 -0
- package/src/speechflow-node-xio-mqtt.ts +154 -0
- package/src/speechflow-node-xio-websocket.ts +248 -0
- package/src/speechflow-node.ts +78 -13
- package/src/speechflow-utils.ts +212 -0
- package/src/speechflow.ts +150 -43
- package/etc/nps.yaml +0 -40
- package/src/speechflow-node-deepgram.ts +0 -133
- package/src/speechflow-node-elevenlabs.ts +0 -116
- package/src/speechflow-node-file.ts +0 -108
- package/src/speechflow-node-websocket.ts +0 -179
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
|
|
12
|
+
import { getStreamAsBuffer } from "get-stream"
|
|
13
|
+
import SpeexResampler from "speex-resampler"
|
|
14
|
+
|
|
15
|
+
/* internal dependencies */
|
|
16
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
17
|
+
|
|
18
|
+
/* SpeechFlow node for Elevenlabs text-to-speech conversion */
|
|
19
|
+
export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
20
|
+
/* declare official node name */
|
|
21
|
+
public static name = "elevenlabs"
|
|
22
|
+
|
|
23
|
+
/* internal state */
|
|
24
|
+
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
25
|
+
private static speexInitialized = false
|
|
26
|
+
|
|
27
|
+
/* construct node */
|
|
28
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
29
|
+
super(id, cfg, opts, args)
|
|
30
|
+
|
|
31
|
+
/* declare node configuration parameters */
|
|
32
|
+
this.configure({
|
|
33
|
+
key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
|
|
34
|
+
voice: { type: "string", val: "Brian", pos: 0, match: /^(?:.+)$/ },
|
|
35
|
+
language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ },
|
|
36
|
+
speed: { type: "number", val: 1.05, pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
|
|
37
|
+
optimize: { type: "string", val: "latency", pos: 3, match: /^(?:latency|quality)$/ }
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
/* declare node input/output format */
|
|
41
|
+
this.input = "text"
|
|
42
|
+
this.output = "audio"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* open node */
|
|
46
|
+
async open () {
|
|
47
|
+
/* establish ElevenLabs API connection */
|
|
48
|
+
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
49
|
+
apiKey: this.params.key
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
/* determine maximum sample rate of ElevenLabs tier */
|
|
53
|
+
const maxSampleRates = {
|
|
54
|
+
"free": 16000,
|
|
55
|
+
"starter": 22050,
|
|
56
|
+
"creator": 24000,
|
|
57
|
+
"independent_publisher": 44100,
|
|
58
|
+
"growing_business": 44100,
|
|
59
|
+
"enterprise": 44100
|
|
60
|
+
}
|
|
61
|
+
const sub = await this.elevenlabs.user.subscription.get()
|
|
62
|
+
const tier = (sub.tier ?? "free") as keyof typeof maxSampleRates
|
|
63
|
+
this.log("info", `determined ElevenLabs tier: "${tier}"`)
|
|
64
|
+
let maxSampleRate = 16000
|
|
65
|
+
if (maxSampleRates[tier] !== undefined)
|
|
66
|
+
maxSampleRate = maxSampleRates[tier]
|
|
67
|
+
this.log("info", `determined maximum audio sample rate: ${maxSampleRate}`)
|
|
68
|
+
|
|
69
|
+
/* determine voice for text-to-speech operation
|
|
70
|
+
(for details see https://elevenlabs.io/text-to-speech) */
|
|
71
|
+
const voices = await this.elevenlabs.voices.getAll()
|
|
72
|
+
let voice = voices.voices.find((voice) => voice.name === this.params.voice)
|
|
73
|
+
if (voice === undefined) {
|
|
74
|
+
voice = voices.voices.find((voice) => voice.name!.startsWith(this.params.voice))
|
|
75
|
+
if (voice === undefined)
|
|
76
|
+
throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
|
|
77
|
+
}
|
|
78
|
+
const info = Object.keys(voice.labels ?? {}).length > 0 ?
|
|
79
|
+
(", " + Object.entries(voice.labels!)
|
|
80
|
+
.map(([ key, val ]) => `${key}: "${val}"`).join(", ")) : ""
|
|
81
|
+
this.log("info", `selected voice: name: "${voice.name}"${info}`)
|
|
82
|
+
|
|
83
|
+
/* perform text-to-speech operation with Elevenlabs API */
|
|
84
|
+
const model = this.params.optimize === "quality" ?
|
|
85
|
+
"eleven_multilingual_v2" :
|
|
86
|
+
"eleven_flash_v2_5"
|
|
87
|
+
const speechStream = (text: string) => {
|
|
88
|
+
this.log("info", `ElevenLabs: send text "${text}"`)
|
|
89
|
+
return this.elevenlabs!.textToSpeech.convert(voice.voiceId, {
|
|
90
|
+
text,
|
|
91
|
+
modelId: model,
|
|
92
|
+
languageCode: this.params.language,
|
|
93
|
+
outputFormat: `pcm_${maxSampleRate}` as ElevenLabs.ElevenLabs.OutputFormat,
|
|
94
|
+
seed: 815, /* arbitrary, but fixated by us */
|
|
95
|
+
voiceSettings: {
|
|
96
|
+
speed: this.params.speed
|
|
97
|
+
}
|
|
98
|
+
}, {
|
|
99
|
+
timeoutInSeconds: 30,
|
|
100
|
+
maxRetries: 10
|
|
101
|
+
})
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/* establish resampler from ElevenLabs's maximum 24Khz
|
|
105
|
+
output to our standard audio sample rate (48KHz) */
|
|
106
|
+
if (!SpeechFlowNodeElevenlabs.speexInitialized) {
|
|
107
|
+
/* at least once initialize resampler */
|
|
108
|
+
await SpeexResampler.initPromise
|
|
109
|
+
SpeechFlowNodeElevenlabs.speexInitialized = true
|
|
110
|
+
}
|
|
111
|
+
const resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
112
|
+
|
|
113
|
+
/* create transform stream and connect it to the ElevenLabs API */
|
|
114
|
+
const log = (level: string, msg: string) => { this.log(level, msg) }
|
|
115
|
+
this.stream = new Stream.Transform({
|
|
116
|
+
writableObjectMode: true,
|
|
117
|
+
readableObjectMode: true,
|
|
118
|
+
decodeStrings: false,
|
|
119
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
120
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
121
|
+
callback(new Error("invalid chunk payload type"))
|
|
122
|
+
else {
|
|
123
|
+
speechStream(chunk.payload).then((stream) => {
|
|
124
|
+
getStreamAsBuffer(stream).then((buffer) => {
|
|
125
|
+
const bufferResampled = resampler.processChunk(buffer)
|
|
126
|
+
log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
|
|
127
|
+
const chunkNew = chunk.clone()
|
|
128
|
+
chunkNew.type = "audio"
|
|
129
|
+
chunkNew.payload = bufferResampled
|
|
130
|
+
this.push(chunkNew)
|
|
131
|
+
callback()
|
|
132
|
+
}).catch((error) => {
|
|
133
|
+
callback(error)
|
|
134
|
+
})
|
|
135
|
+
}).catch((error) => {
|
|
136
|
+
callback(error)
|
|
137
|
+
})
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
final (callback) {
|
|
141
|
+
this.push(null)
|
|
142
|
+
callback()
|
|
143
|
+
}
|
|
144
|
+
})
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/* close node */
|
|
148
|
+
async close () {
|
|
149
|
+
/* destroy stream */
|
|
150
|
+
if (this.stream !== null) {
|
|
151
|
+
this.stream.destroy()
|
|
152
|
+
this.stream = null
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/* destroy ElevenLabs API */
|
|
156
|
+
if (this.elevenlabs !== null)
|
|
157
|
+
this.elevenlabs = null
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
@@ -6,13 +6,12 @@
|
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import Stream from "node:stream"
|
|
9
|
-
import { EventEmitter } from "node:events"
|
|
10
9
|
|
|
11
10
|
/* external dependencies */
|
|
12
11
|
import * as DeepL from "deepl-node"
|
|
13
12
|
|
|
14
13
|
/* internal dependencies */
|
|
15
|
-
import SpeechFlowNode
|
|
14
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
15
|
|
|
17
16
|
/* SpeechFlow node for DeepL text-to-text translations */
|
|
18
17
|
export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
@@ -23,17 +22,21 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
23
22
|
private deepl: DeepL.Translator | null = null
|
|
24
23
|
|
|
25
24
|
/* construct node */
|
|
26
|
-
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
27
|
-
super(id, opts, args)
|
|
25
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
26
|
+
super(id, cfg, opts, args)
|
|
28
27
|
|
|
29
28
|
/* declare node configuration parameters */
|
|
30
29
|
this.configure({
|
|
31
30
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
|
|
32
|
-
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en
|
|
33
|
-
dst: { type: "string", pos: 1, val: "en
|
|
31
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
|
+
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
34
33
|
optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
|
|
35
34
|
})
|
|
36
35
|
|
|
36
|
+
/* sanity check situation */
|
|
37
|
+
if (this.params.src === this.params.dst)
|
|
38
|
+
throw new Error("source and destination languages cannot be the same")
|
|
39
|
+
|
|
37
40
|
/* declare node input/output format */
|
|
38
41
|
this.input = "text"
|
|
39
42
|
this.output = "text"
|
|
@@ -46,7 +49,9 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
46
49
|
|
|
47
50
|
/* provide text-to-text translation */
|
|
48
51
|
const translate = async (text: string) => {
|
|
49
|
-
const
|
|
52
|
+
const src = this.params.src === "en" ? "en-US" : this.params.src
|
|
53
|
+
const dst = this.params.dst === "en" ? "en-US" : this.params.dst
|
|
54
|
+
const result = await this.deepl!.translateText(text, src, dst, {
|
|
50
55
|
splitSentences: "off",
|
|
51
56
|
modelType: this.params.optimize === "latency" ?
|
|
52
57
|
"latency_optimized" : "prefer_quality_optimized",
|
|
@@ -57,32 +62,38 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
57
62
|
}
|
|
58
63
|
|
|
59
64
|
/* establish a duplex stream and connect it to DeepL translation */
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
callback()
|
|
67
|
-
}
|
|
65
|
+
this.stream = new Stream.Transform({
|
|
66
|
+
readableObjectMode: true,
|
|
67
|
+
writableObjectMode: true,
|
|
68
|
+
decodeStrings: false,
|
|
69
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
70
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
71
|
+
callback(new Error("invalid chunk payload type"))
|
|
68
72
|
else {
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
if (chunk.payload === "") {
|
|
74
|
+
this.push(chunk)
|
|
71
75
|
callback()
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
translate(chunk.payload).then((payload) => {
|
|
79
|
+
const chunkNew = chunk.clone()
|
|
80
|
+
chunkNew.payload = payload
|
|
81
|
+
this.push(chunkNew)
|
|
82
|
+
callback()
|
|
83
|
+
}).catch((err) => {
|
|
84
|
+
callback(err)
|
|
85
|
+
})
|
|
86
|
+
}
|
|
75
87
|
}
|
|
76
88
|
},
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
})
|
|
89
|
+
final (callback) {
|
|
90
|
+
this.push(null)
|
|
91
|
+
callback()
|
|
81
92
|
}
|
|
82
93
|
})
|
|
83
94
|
}
|
|
84
95
|
|
|
85
|
-
/*
|
|
96
|
+
/* close node */
|
|
86
97
|
async close () {
|
|
87
98
|
/* close stream */
|
|
88
99
|
if (this.stream !== null) {
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import wrapText from "wrap-text"
|
|
12
|
+
|
|
13
|
+
/* internal dependencies */
|
|
14
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
|
+
|
|
16
|
+
/* SpeechFlow node for text-to-text formatting */
|
|
17
|
+
export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
18
|
+
/* declare official node name */
|
|
19
|
+
public static name = "format"
|
|
20
|
+
|
|
21
|
+
/* construct node */
|
|
22
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
23
|
+
super(id, cfg, opts, args)
|
|
24
|
+
|
|
25
|
+
/* declare node configuration parameters */
|
|
26
|
+
this.configure({
|
|
27
|
+
width: { type: "number", val: 80 }
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
/* declare node input/output format */
|
|
31
|
+
this.input = "text"
|
|
32
|
+
this.output = "text"
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/* open node */
|
|
36
|
+
async open () {
|
|
37
|
+
/* provide text-to-text formatter */
|
|
38
|
+
const format = async (text: string) => {
|
|
39
|
+
text = wrapText(text, this.params.width)
|
|
40
|
+
text = text.replace(/([^\n])$/, "$1\n")
|
|
41
|
+
return text
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/* establish a duplex stream and connect it to DeepL translation */
|
|
45
|
+
this.stream = new Stream.Transform({
|
|
46
|
+
readableObjectMode: true,
|
|
47
|
+
writableObjectMode: true,
|
|
48
|
+
decodeStrings: false,
|
|
49
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
50
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
51
|
+
callback(new Error("invalid chunk payload type"))
|
|
52
|
+
else {
|
|
53
|
+
if (chunk.payload === "") {
|
|
54
|
+
this.push(chunk)
|
|
55
|
+
callback()
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
format(chunk.payload).then((payload) => {
|
|
59
|
+
const chunkNew = chunk.clone()
|
|
60
|
+
chunkNew.payload = payload
|
|
61
|
+
this.push(chunkNew)
|
|
62
|
+
callback()
|
|
63
|
+
}).catch((err) => {
|
|
64
|
+
callback(err)
|
|
65
|
+
})
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
final (callback) {
|
|
70
|
+
this.push(null)
|
|
71
|
+
callback()
|
|
72
|
+
}
|
|
73
|
+
})
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/* open node */
|
|
77
|
+
async close () {
|
|
78
|
+
/* close stream */
|
|
79
|
+
if (this.stream !== null) {
|
|
80
|
+
this.stream.destroy()
|
|
81
|
+
this.stream = null
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
@@ -6,13 +6,12 @@
|
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import Stream from "node:stream"
|
|
9
|
-
import { EventEmitter } from "node:events"
|
|
10
9
|
|
|
11
10
|
/* external dependencies */
|
|
12
11
|
import { Ollama } from "ollama"
|
|
13
12
|
|
|
14
13
|
/* internal dependencies */
|
|
15
|
-
import SpeechFlowNode
|
|
14
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
15
|
|
|
17
16
|
/* internal utility types */
|
|
18
17
|
type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
|
|
@@ -28,6 +27,69 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
|
|
|
28
27
|
|
|
29
28
|
/* internal LLM setup */
|
|
30
29
|
private setup: Config = {
|
|
30
|
+
/* English (EN) spellchecking only */
|
|
31
|
+
"en-en": {
|
|
32
|
+
systemPrompt:
|
|
33
|
+
"You are a proofreader and spellchecker for English.\n" +
|
|
34
|
+
"Output only the corrected text.\n" +
|
|
35
|
+
"Do NOT use markdown.\n" +
|
|
36
|
+
"Do NOT give any explanations.\n" +
|
|
37
|
+
"Do NOT give any introduction.\n" +
|
|
38
|
+
"Do NOT give any comments.\n" +
|
|
39
|
+
"Do NOT give any preamble.\n" +
|
|
40
|
+
"Do NOT give any prolog.\n" +
|
|
41
|
+
"Do NOT give any epilog.\n" +
|
|
42
|
+
"Do NOT change the gammar.\n" +
|
|
43
|
+
"Do NOT use synonyms for words.\n" +
|
|
44
|
+
"Keep all words.\n" +
|
|
45
|
+
"Fill in missing commas.\n" +
|
|
46
|
+
"Fill in missing points.\n" +
|
|
47
|
+
"Fill in missing question marks.\n" +
|
|
48
|
+
"Fill in missing hyphens.\n" +
|
|
49
|
+
"Focus ONLY on the word spelling.\n" +
|
|
50
|
+
"The text you have to correct is:\n",
|
|
51
|
+
chat: [
|
|
52
|
+
{ role: "user", content: "I luve my wyfe" },
|
|
53
|
+
{ role: "system", content: "I love my wife." },
|
|
54
|
+
{ role: "user", content: "The weether is wunderfull!" },
|
|
55
|
+
{ role: "system", content: "The weather is wonderful!" },
|
|
56
|
+
{ role: "user", content: "The live awesome but I'm hungry." },
|
|
57
|
+
{ role: "system", content: "The live is awesome, but I'm hungry." }
|
|
58
|
+
]
|
|
59
|
+
},
|
|
60
|
+
|
|
61
|
+
/* German (DE) spellchecking only */
|
|
62
|
+
"de-de": {
|
|
63
|
+
systemPrompt:
|
|
64
|
+
"Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
|
|
65
|
+
"Gib nur den korrigierten Text aus.\n" +
|
|
66
|
+
"Benutze KEIN Markdown.\n" +
|
|
67
|
+
"Gib KEINE Erklärungen.\n" +
|
|
68
|
+
"Gib KEINE Einleitung.\n" +
|
|
69
|
+
"Gib KEINE Kommentare.\n" +
|
|
70
|
+
"Gib KEINE Preamble.\n" +
|
|
71
|
+
"Gib KEINEN Prolog.\n" +
|
|
72
|
+
"Gib KEINEN Epilog.\n" +
|
|
73
|
+
"Ändere NICHT die Grammatik.\n" +
|
|
74
|
+
"Verwende KEINE Synonyme für Wörter.\n" +
|
|
75
|
+
"Behalte alle Wörter bei.\n" +
|
|
76
|
+
"Füge fehlende Kommas ein.\n" +
|
|
77
|
+
"Füge fehlende Punkte ein.\n" +
|
|
78
|
+
"Füge fehlende Fragezeichen ein.\n" +
|
|
79
|
+
"Füge fehlende Bindestriche ein.\n" +
|
|
80
|
+
"Füge fehlende Gedankenstriche ein.\n" +
|
|
81
|
+
"Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
|
|
82
|
+
"Der von dir zu korrigierende Text ist:\n",
|
|
83
|
+
chat: [
|
|
84
|
+
{ role: "user", content: "Ich ljebe meine Frao" },
|
|
85
|
+
{ role: "system", content: "Ich liebe meine Frau." },
|
|
86
|
+
{ role: "user", content: "Die Wedter ist wunderschoen." },
|
|
87
|
+
{ role: "system", content: "Das Wetter ist wunderschön." },
|
|
88
|
+
{ role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
|
|
89
|
+
{ role: "system", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
|
|
90
|
+
]
|
|
91
|
+
},
|
|
92
|
+
|
|
31
93
|
/* English (EN) to German (DE) translation */
|
|
32
94
|
"en-de": {
|
|
33
95
|
systemPrompt:
|
|
@@ -78,8 +140,8 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
|
|
|
78
140
|
}
|
|
79
141
|
|
|
80
142
|
/* construct node */
|
|
81
|
-
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
82
|
-
super(id, opts, args)
|
|
143
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
144
|
+
super(id, cfg, opts, args)
|
|
83
145
|
|
|
84
146
|
/* declare node configuration parameters */
|
|
85
147
|
this.configure({
|
|
@@ -88,10 +150,6 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
|
|
|
88
150
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
|
|
89
151
|
})
|
|
90
152
|
|
|
91
|
-
/* sanity check situation */
|
|
92
|
-
if (this.params.src === this.params.dst)
|
|
93
|
-
throw new Error("source and destination languages cannot be the same")
|
|
94
|
-
|
|
95
153
|
/* declare node input/output format */
|
|
96
154
|
this.input = "text"
|
|
97
155
|
this.output = "text"
|
|
@@ -126,27 +184,33 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
|
|
|
126
184
|
}
|
|
127
185
|
|
|
128
186
|
/* establish a duplex stream and connect it to Ollama */
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
callback()
|
|
136
|
-
}
|
|
187
|
+
this.stream = new Stream.Transform({
|
|
188
|
+
readableObjectMode: true,
|
|
189
|
+
writableObjectMode: true,
|
|
190
|
+
decodeStrings: false,
|
|
191
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
192
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
193
|
+
callback(new Error("invalid chunk payload type"))
|
|
137
194
|
else {
|
|
138
|
-
|
|
139
|
-
|
|
195
|
+
if (chunk.payload === "") {
|
|
196
|
+
this.push(chunk)
|
|
140
197
|
callback()
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
translate(chunk.payload).then((payload) => {
|
|
201
|
+
const chunkNew = chunk.clone()
|
|
202
|
+
chunkNew.payload = payload
|
|
203
|
+
this.push(chunkNew)
|
|
204
|
+
callback()
|
|
205
|
+
}).catch((err) => {
|
|
206
|
+
callback(err)
|
|
207
|
+
})
|
|
208
|
+
}
|
|
144
209
|
}
|
|
145
210
|
},
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
})
|
|
211
|
+
final (callback) {
|
|
212
|
+
this.push(null)
|
|
213
|
+
callback()
|
|
150
214
|
}
|
|
151
215
|
})
|
|
152
216
|
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
|
+
|
|
11
|
+
/* external dependencies */
|
|
12
|
+
import * as Transformers from "@huggingface/transformers"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
|
|
17
|
+
/* SpeechFlow node for OPUS text-to-text translation */
|
|
18
|
+
export default class SpeechFlowNodeOPUS extends SpeechFlowNode {
|
|
19
|
+
/* declare official node name */
|
|
20
|
+
public static name = "opus"
|
|
21
|
+
|
|
22
|
+
/* internal state */
|
|
23
|
+
private translator: Transformers.TranslationPipeline | null = null
|
|
24
|
+
|
|
25
|
+
/* construct node */
|
|
26
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
27
|
+
super(id, cfg, opts, args)
|
|
28
|
+
|
|
29
|
+
/* declare node configuration parameters */
|
|
30
|
+
this.configure({
|
|
31
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
|
+
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
/* sanity check situation */
|
|
36
|
+
if (this.params.src === this.params.dst)
|
|
37
|
+
throw new Error("source and destination languages cannot be the same")
|
|
38
|
+
|
|
39
|
+
/* declare node input/output format */
|
|
40
|
+
this.input = "text"
|
|
41
|
+
this.output = "text"
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/* open node */
|
|
45
|
+
async open () {
|
|
46
|
+
/* instantiate OPUS */
|
|
47
|
+
const model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
|
|
48
|
+
this.translator = await Transformers.pipeline("translation", model, {
|
|
49
|
+
cache_dir: path.join(this.config.cacheDir, "opus"),
|
|
50
|
+
dtype: "q4",
|
|
51
|
+
device: "gpu"
|
|
52
|
+
})
|
|
53
|
+
if (this.translator === null)
|
|
54
|
+
throw new Error("failed to instantiate translator pipeline")
|
|
55
|
+
|
|
56
|
+
/* provide text-to-text translation */
|
|
57
|
+
const translate = async (text: string) => {
|
|
58
|
+
const result = await this.translator!(text)
|
|
59
|
+
return Array.isArray(result) ?
|
|
60
|
+
(result[0] as Transformers.TranslationSingle).translation_text :
|
|
61
|
+
(result as Transformers.TranslationSingle).translation_text
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/* establish a duplex stream and connect it to Ollama */
|
|
65
|
+
this.stream = new Stream.Transform({
|
|
66
|
+
readableObjectMode: true,
|
|
67
|
+
writableObjectMode: true,
|
|
68
|
+
decodeStrings: false,
|
|
69
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
70
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
71
|
+
callback(new Error("invalid chunk payload type"))
|
|
72
|
+
else {
|
|
73
|
+
if (chunk.payload === "") {
|
|
74
|
+
this.push(chunk)
|
|
75
|
+
callback()
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
translate(chunk.payload).then((payload) => {
|
|
79
|
+
const chunkNew = chunk.clone()
|
|
80
|
+
chunkNew.payload = payload
|
|
81
|
+
this.push(chunkNew)
|
|
82
|
+
callback()
|
|
83
|
+
}).catch((err) => {
|
|
84
|
+
callback(err)
|
|
85
|
+
})
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
final (callback) {
|
|
90
|
+
this.push(null)
|
|
91
|
+
callback()
|
|
92
|
+
}
|
|
93
|
+
})
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/* close node */
|
|
97
|
+
async close () {
|
|
98
|
+
/* close stream */
|
|
99
|
+
if (this.stream !== null) {
|
|
100
|
+
this.stream.destroy()
|
|
101
|
+
this.stream = null
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/* shutdown OPUS */
|
|
105
|
+
if (this.translator !== null) {
|
|
106
|
+
this.translator.dispose()
|
|
107
|
+
this.translator = null
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|