speechflow 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -0
- package/dst/speechflow-node-deepgram.d.ts +10 -0
- package/dst/speechflow-node-deepgram.js +44 -23
- package/dst/speechflow-node-deepl.d.ts +10 -0
- package/dst/speechflow-node-deepl.js +30 -12
- package/dst/speechflow-node-device.d.ts +11 -0
- package/dst/speechflow-node-device.js +73 -14
- package/dst/speechflow-node-elevenlabs.d.ts +10 -0
- package/dst/speechflow-node-elevenlabs.js +14 -2
- package/dst/speechflow-node-ffmpeg.d.ts +11 -0
- package/dst/speechflow-node-ffmpeg.js +114 -0
- package/dst/speechflow-node-file.d.ts +9 -0
- package/dst/speechflow-node-file.js +71 -13
- package/dst/speechflow-node-gemma.d.ts +11 -0
- package/dst/speechflow-node-gemma.js +152 -0
- package/dst/speechflow-node-websocket.d.ts +11 -0
- package/dst/speechflow-node-websocket.js +34 -6
- package/dst/speechflow-node.d.ts +38 -0
- package/dst/speechflow-node.js +28 -10
- package/dst/speechflow.d.ts +1 -0
- package/dst/speechflow.js +128 -43
- package/etc/tsconfig.json +2 -0
- package/package.json +25 -11
- package/src/speechflow-node-deepgram.ts +55 -24
- package/src/speechflow-node-deepl.ts +38 -16
- package/src/speechflow-node-device.ts +88 -14
- package/src/speechflow-node-elevenlabs.ts +19 -2
- package/src/speechflow-node-ffmpeg.ts +122 -0
- package/src/speechflow-node-file.ts +76 -14
- package/src/speechflow-node-gemma.ts +169 -0
- package/src/speechflow-node-websocket.ts +52 -13
- package/src/speechflow-node.ts +43 -21
- package/src/speechflow.ts +144 -47
- package/dst/speechflow-util.js +0 -37
- package/src/speechflow-util.ts +0 -36
|
@@ -4,38 +4,102 @@
|
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
/* standard dependencies */
|
|
7
8
|
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
8
11
|
import PortAudio from "@gpeng/naudiodon"
|
|
12
|
+
|
|
13
|
+
/* internal dependencies */
|
|
9
14
|
import SpeechFlowNode from "./speechflow-node"
|
|
10
|
-
import SpeechFlowUtil from "./speechflow-util"
|
|
11
15
|
|
|
16
|
+
/* SpeechFlow node for device access */
|
|
12
17
|
export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
18
|
+
/* declare official node name */
|
|
19
|
+
public static name = "device"
|
|
20
|
+
|
|
21
|
+
/* internal state */
|
|
13
22
|
private io: PortAudio.IoStreamRead | PortAudio.IoStreamWrite | PortAudio.IoStreamDuplex | null = null
|
|
23
|
+
|
|
24
|
+
/* construct node */
|
|
14
25
|
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
15
26
|
super(id, opts, args)
|
|
27
|
+
|
|
28
|
+
/* declare node configuration parameters */
|
|
16
29
|
this.configure({
|
|
17
30
|
device: { type: "string", pos: 0, match: /^(.+?):(.+)$/ },
|
|
18
31
|
mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
|
|
19
32
|
})
|
|
33
|
+
|
|
34
|
+
/* declare node input/output format */
|
|
35
|
+
if (this.params.mode === "rw") {
|
|
36
|
+
this.input = "audio"
|
|
37
|
+
this.output = "audio"
|
|
38
|
+
}
|
|
39
|
+
else if (this.params.mode === "r") {
|
|
40
|
+
this.input = "none"
|
|
41
|
+
this.output = "audio"
|
|
42
|
+
}
|
|
43
|
+
else if (this.params.mode === "w") {
|
|
44
|
+
this.input = "audio"
|
|
45
|
+
this.output = "none"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/* INTERNAL: utility function for finding audio device by pseudo-URL notation */
|
|
50
|
+
private audioDeviceFromURL (mode: "any" | "r" | "w" | "rw", url: string) {
|
|
51
|
+
/* parse URL */
|
|
52
|
+
const m = url.match(/^(.+?):(.+)$/)
|
|
53
|
+
if (m === null)
|
|
54
|
+
throw new Error(`invalid audio device URL "${url}"`)
|
|
55
|
+
const [ , type, name ] = m
|
|
56
|
+
|
|
57
|
+
/* determine audio API */
|
|
58
|
+
const apis = PortAudio.getHostAPIs()
|
|
59
|
+
const api = apis.HostAPIs.find((api) => api.type.toLowerCase() === type.toLowerCase())
|
|
60
|
+
if (!api)
|
|
61
|
+
throw new Error(`invalid audio API type "${type}"`)
|
|
62
|
+
|
|
63
|
+
/* determine device of audio API */
|
|
64
|
+
const devices = PortAudio.getDevices()
|
|
65
|
+
const device = devices.find((device) => {
|
|
66
|
+
return (
|
|
67
|
+
( ( mode === "r" && device.maxInputChannels > 0)
|
|
68
|
+
|| (mode === "w" && device.maxOutputChannels > 0)
|
|
69
|
+
|| (mode === "rw" && device.maxInputChannels > 0 && device.maxOutputChannels > 0)
|
|
70
|
+
|| (mode === "any" && (device.maxInputChannels > 0 || device.maxOutputChannels > 0)))
|
|
71
|
+
&& device.name.match(name)
|
|
72
|
+
&& device.hostAPIName === api.name
|
|
73
|
+
)
|
|
74
|
+
})
|
|
75
|
+
if (!device)
|
|
76
|
+
throw new Error(`invalid audio device "${name}" (of audio API type "${type}")`)
|
|
77
|
+
return device
|
|
20
78
|
}
|
|
79
|
+
|
|
80
|
+
/* open node */
|
|
21
81
|
async open () {
|
|
22
82
|
/* determine device */
|
|
23
|
-
const device =
|
|
83
|
+
const device = this.audioDeviceFromURL(this.params.mode, this.params.device)
|
|
24
84
|
|
|
25
85
|
/* sanity check sample rate compatibility
|
|
26
86
|
(we still do not resample in input/output for simplification reasons) */
|
|
27
87
|
if (device.defaultSampleRate !== this.config.audioSampleRate)
|
|
28
|
-
throw new Error(`device
|
|
88
|
+
throw new Error(`audio device sample rate ${device.defaultSampleRate} is ` +
|
|
29
89
|
`incompatible with required sample rate ${this.config.audioSampleRate}`)
|
|
30
90
|
|
|
31
91
|
/* establish device connection
|
|
32
92
|
Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
|
|
33
93
|
declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
|
|
34
94
|
so it is correct to cast it back to Stream.{Readable,Writable,Duplex} */
|
|
35
|
-
|
|
95
|
+
/* FIXME: the underlying PortAudio outputs verbose/debugging messages */
|
|
96
|
+
if (this.params.mode === "rw") {
|
|
97
|
+
/* input/output device */
|
|
98
|
+
if (device.maxInputChannels === 0)
|
|
99
|
+
throw new Error(`device "${device.id}" does not have any input channels (required by read/write mode)`)
|
|
100
|
+
if (device.maxOutputChannels === 0)
|
|
101
|
+
throw new Error(`device "${device.id}" does not have any output channels (required by read/write mode)`)
|
|
36
102
|
this.log("info", `resolved "${this.params.device}" to duplex device "${device.id}"`)
|
|
37
|
-
this.input = "audio"
|
|
38
|
-
this.output = "audio"
|
|
39
103
|
this.io = PortAudio.AudioIO({
|
|
40
104
|
inOptions: {
|
|
41
105
|
deviceId: device.id,
|
|
@@ -52,10 +116,11 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
|
52
116
|
})
|
|
53
117
|
this.stream = this.io as unknown as Stream.Duplex
|
|
54
118
|
}
|
|
55
|
-
else if (
|
|
119
|
+
else if (this.params.mode === "r") {
|
|
120
|
+
/* input device */
|
|
121
|
+
if (device.maxInputChannels === 0)
|
|
122
|
+
throw new Error(`device "${device.id}" does not have any input channels (required by read mode)`)
|
|
56
123
|
this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`)
|
|
57
|
-
this.input = "none"
|
|
58
|
-
this.output = "audio"
|
|
59
124
|
this.io = PortAudio.AudioIO({
|
|
60
125
|
inOptions: {
|
|
61
126
|
deviceId: device.id,
|
|
@@ -66,10 +131,11 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
|
66
131
|
})
|
|
67
132
|
this.stream = this.io as unknown as Stream.Readable
|
|
68
133
|
}
|
|
69
|
-
else if (
|
|
134
|
+
else if (this.params.mode === "w") {
|
|
135
|
+
/* output device */
|
|
136
|
+
if (device.maxOutputChannels === 0)
|
|
137
|
+
throw new Error(`device "${device.id}" does not have any output channels (required by write mode)`)
|
|
70
138
|
this.log("info", `resolved "${this.params.device}" to output device "${device.id}"`)
|
|
71
|
-
this.input = "audio"
|
|
72
|
-
this.output = "none"
|
|
73
139
|
this.io = PortAudio.AudioIO({
|
|
74
140
|
outOptions: {
|
|
75
141
|
deviceId: device.id,
|
|
@@ -83,14 +149,22 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
|
83
149
|
else
|
|
84
150
|
throw new Error(`device "${device.id}" does not have any input or output channels`)
|
|
85
151
|
|
|
86
|
-
/* pass-through errors */
|
|
152
|
+
/* pass-through PortAudio errors */
|
|
87
153
|
this.io.on("error", (err) => {
|
|
88
154
|
this.emit("error", err)
|
|
89
155
|
})
|
|
156
|
+
|
|
157
|
+
/* start PortAudio */
|
|
158
|
+
this.io.start()
|
|
90
159
|
}
|
|
160
|
+
|
|
161
|
+
/* close node */
|
|
91
162
|
async close () {
|
|
92
|
-
|
|
163
|
+
/* shutdown PortAudio */
|
|
164
|
+
if (this.io !== null) {
|
|
93
165
|
this.io.quit()
|
|
166
|
+
this.io = null
|
|
167
|
+
}
|
|
94
168
|
}
|
|
95
169
|
}
|
|
96
170
|
|
|
@@ -4,12 +4,15 @@
|
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
/* standard dependencies */
|
|
7
8
|
import Stream from "node:stream"
|
|
8
9
|
import { EventEmitter } from "node:events"
|
|
9
10
|
|
|
11
|
+
/* external dependencies */
|
|
10
12
|
import * as ElevenLabs from "elevenlabs"
|
|
11
13
|
import { getStreamAsBuffer } from "get-stream"
|
|
12
14
|
|
|
15
|
+
/* internal dependencies */
|
|
13
16
|
import SpeechFlowNode from "./speechflow-node"
|
|
14
17
|
|
|
15
18
|
/*
|
|
@@ -28,19 +31,30 @@ const elevenlabsVoices = {
|
|
|
28
31
|
*/
|
|
29
32
|
|
|
30
33
|
export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
34
|
+
/* declare official node name */
|
|
35
|
+
public static name = "elevenlabs"
|
|
36
|
+
|
|
37
|
+
/* internal state */
|
|
31
38
|
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
39
|
+
|
|
40
|
+
/* construct node */
|
|
32
41
|
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
33
42
|
super(id, opts, args)
|
|
43
|
+
|
|
44
|
+
/* declare node configuration parameters */
|
|
34
45
|
this.configure({
|
|
35
46
|
key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
|
|
36
47
|
voice: { type: "string", val: "Brian", pos: 0 },
|
|
37
48
|
language: { type: "string", val: "de", pos: 1 }
|
|
38
49
|
})
|
|
39
|
-
|
|
40
|
-
|
|
50
|
+
|
|
51
|
+
/* declare node input/output format */
|
|
41
52
|
this.input = "text"
|
|
42
53
|
this.output = "audio"
|
|
54
|
+
}
|
|
43
55
|
|
|
56
|
+
/* open node */
|
|
57
|
+
async open () {
|
|
44
58
|
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
45
59
|
apiKey: this.params.key
|
|
46
60
|
})
|
|
@@ -89,7 +103,10 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
89
103
|
}
|
|
90
104
|
})
|
|
91
105
|
}
|
|
106
|
+
|
|
107
|
+
/* close node */
|
|
92
108
|
async close () {
|
|
109
|
+
/* destroy stream */
|
|
93
110
|
if (this.stream !== null) {
|
|
94
111
|
this.stream.destroy()
|
|
95
112
|
this.stream = null
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import FFmpeg from "@rse/ffmpeg"
|
|
12
|
+
import { Converter as FFmpegStream } from "ffmpeg-stream"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
16
|
+
|
|
17
|
+
/* SpeechFlow node for FFmpeg */
|
|
18
|
+
export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
19
|
+
/* declare official node name */
|
|
20
|
+
public static name = "ffmpeg"
|
|
21
|
+
|
|
22
|
+
/* internal state */
|
|
23
|
+
private ffmpegBinary = FFmpeg.supported ? FFmpeg.binary : "ffmpeg"
|
|
24
|
+
private ffmpeg: FFmpegStream | null = null
|
|
25
|
+
|
|
26
|
+
/* construct node */
|
|
27
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
28
|
+
super(id, opts, args)
|
|
29
|
+
|
|
30
|
+
/* declare node configuration parameters */
|
|
31
|
+
this.configure({
|
|
32
|
+
src: { type: "string", pos: 0, val: "pcm", match: /^(?:pcm|wav|mp3|opus)$/ },
|
|
33
|
+
dst: { type: "string", pos: 1, val: "wav", match: /^(?:pcm|wav|mp3|opus)$/ }
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
/* declare node input/output format */
|
|
37
|
+
this.input = "audio"
|
|
38
|
+
this.output = "audio"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/* open node */
|
|
42
|
+
async open () {
|
|
43
|
+
/* sanity check situation */
|
|
44
|
+
if (this.params.src === this.params.dst)
|
|
45
|
+
throw new Error("source and destination formats should not be the same")
|
|
46
|
+
|
|
47
|
+
/* instantiate FFmpeg sub-process */
|
|
48
|
+
this.ffmpeg = new FFmpegStream(this.ffmpegBinary)
|
|
49
|
+
const streamInput = this.ffmpeg.createInputStream({
|
|
50
|
+
/* FFmpeg input options */
|
|
51
|
+
"fflags": "nobuffer",
|
|
52
|
+
"flags": "low_delay",
|
|
53
|
+
"probesize": 32,
|
|
54
|
+
"analyzeduration": 0,
|
|
55
|
+
...(this.params.src === "pcm" ? {
|
|
56
|
+
"f": "s16le",
|
|
57
|
+
"ar": this.config.audioSampleRate,
|
|
58
|
+
"ac": this.config.audioChannels
|
|
59
|
+
} : {}),
|
|
60
|
+
...(this.params.src === "wav" ? {
|
|
61
|
+
"f": "wav"
|
|
62
|
+
} : {}),
|
|
63
|
+
...(this.params.src === "mp3" ? {
|
|
64
|
+
"f": "mp3"
|
|
65
|
+
} : {}),
|
|
66
|
+
...(this.params.src === "opus" ? {
|
|
67
|
+
"f": "opus"
|
|
68
|
+
} : {})
|
|
69
|
+
})
|
|
70
|
+
const streamOutput = this.ffmpeg.createOutputStream({
|
|
71
|
+
/* FFmpeg output options */
|
|
72
|
+
"flush_packets": 1,
|
|
73
|
+
...(this.params.dst === "pcm" ? {
|
|
74
|
+
"c:a": "pcm_s16le",
|
|
75
|
+
"ar": this.config.audioSampleRate,
|
|
76
|
+
"ac": this.config.audioChannels,
|
|
77
|
+
"f": "s16le",
|
|
78
|
+
} : {}),
|
|
79
|
+
...(this.params.dst === "wav" ? {
|
|
80
|
+
"f": "wav"
|
|
81
|
+
} : {}),
|
|
82
|
+
...(this.params.dst === "mp3" ? {
|
|
83
|
+
"c:a": "libmp3lame",
|
|
84
|
+
"b:a": "192k",
|
|
85
|
+
"f": "mp3"
|
|
86
|
+
} : {}),
|
|
87
|
+
...(this.params.dst === "opus" ? {
|
|
88
|
+
"acodec": "libopus",
|
|
89
|
+
"f": "opus"
|
|
90
|
+
} : {})
|
|
91
|
+
})
|
|
92
|
+
this.ffmpeg.run()
|
|
93
|
+
|
|
94
|
+
/* establish a duplex stream and connect it to FFmpeg */
|
|
95
|
+
this.stream = Stream.Duplex.from({
|
|
96
|
+
readable: streamOutput,
|
|
97
|
+
writable: streamInput
|
|
98
|
+
})
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/* close node */
|
|
102
|
+
async close () {
|
|
103
|
+
/* close duplex stream */
|
|
104
|
+
if (this.stream !== null) {
|
|
105
|
+
await new Promise<void>((resolve) => {
|
|
106
|
+
if (this.stream instanceof Stream.Duplex)
|
|
107
|
+
this.stream.end(() => { resolve() })
|
|
108
|
+
else
|
|
109
|
+
resolve()
|
|
110
|
+
})
|
|
111
|
+
this.stream.destroy()
|
|
112
|
+
this.stream = null
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/* shutdown FFmpeg */
|
|
116
|
+
if (this.ffmpeg !== null) {
|
|
117
|
+
this.ffmpeg.kill()
|
|
118
|
+
this.ffmpeg = null
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
@@ -4,41 +4,103 @@
|
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
/* standard dependencies */
|
|
7
8
|
import fs from "node:fs"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
|
+
|
|
11
|
+
/* internal dependencies */
|
|
8
12
|
import SpeechFlowNode from "./speechflow-node"
|
|
9
13
|
|
|
10
|
-
|
|
14
|
+
/* SpeechFlow node for file access */
|
|
15
|
+
export default class SpeechFlowNodeFile extends SpeechFlowNode {
|
|
16
|
+
/* declare official node name */
|
|
17
|
+
public static name = "file"
|
|
18
|
+
|
|
19
|
+
/* construct node */
|
|
11
20
|
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
12
21
|
super(id, opts, args)
|
|
22
|
+
|
|
23
|
+
/* declare node configuration parameters */
|
|
13
24
|
this.configure({
|
|
14
25
|
path: { type: "string", pos: 0 },
|
|
15
26
|
mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w|rw)$/ },
|
|
16
27
|
type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ }
|
|
17
28
|
})
|
|
29
|
+
|
|
30
|
+
/* declare node input/output format */
|
|
31
|
+
if (this.params.mode === "rw") {
|
|
32
|
+
this.input = this.params.type
|
|
33
|
+
this.output = this.params.type
|
|
34
|
+
}
|
|
35
|
+
else if (this.params.mode === "r") {
|
|
36
|
+
this.input = "none"
|
|
37
|
+
this.output = this.params.type
|
|
38
|
+
}
|
|
39
|
+
else if (this.params.mode === "w") {
|
|
40
|
+
this.input = this.params.type
|
|
41
|
+
this.output = "none"
|
|
42
|
+
}
|
|
18
43
|
}
|
|
44
|
+
|
|
45
|
+
/* open node */
|
|
19
46
|
async open () {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if (this.params.path === "-")
|
|
47
|
+
const encoding = this.params.type === "text" ? this.config.textEncoding : "binary"
|
|
48
|
+
if (this.params.mode === "rw") {
|
|
49
|
+
if (this.params.path === "-") {
|
|
50
|
+
/* standard I/O */
|
|
51
|
+
process.stdin.setEncoding(encoding)
|
|
52
|
+
process.stdout.setEncoding(encoding)
|
|
53
|
+
this.stream = Stream.Duplex.from({
|
|
54
|
+
readable: process.stdin,
|
|
55
|
+
writable: process.stdout
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
/* file I/O */
|
|
60
|
+
this.stream = Stream.Duplex.from({
|
|
61
|
+
readable: fs.createReadStream(this.params.path, { encoding }),
|
|
62
|
+
writable: fs.createWriteStream(this.params.path, { encoding })
|
|
63
|
+
})
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
else if (this.params.mode === "r") {
|
|
67
|
+
if (this.params.path === "-") {
|
|
68
|
+
/* standard I/O */
|
|
69
|
+
process.stdin.setEncoding(encoding)
|
|
23
70
|
this.stream = process.stdin
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
/* file I/O */
|
|
74
|
+
this.stream = fs.createReadStream(this.params.path, { encoding })
|
|
75
|
+
}
|
|
27
76
|
}
|
|
28
77
|
else if (this.params.mode === "w") {
|
|
29
|
-
|
|
30
|
-
|
|
78
|
+
if (this.params.path === "-") {
|
|
79
|
+
/* standard I/O */
|
|
80
|
+
process.stdout.setEncoding(encoding)
|
|
31
81
|
this.stream = process.stdout
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
/* file I/O */
|
|
85
|
+
this.stream = fs.createWriteStream(this.params.path, { encoding })
|
|
86
|
+
}
|
|
35
87
|
}
|
|
36
88
|
else
|
|
37
89
|
throw new Error(`invalid file mode "${this.params.mode}"`)
|
|
38
90
|
}
|
|
91
|
+
|
|
92
|
+
/* close node */
|
|
39
93
|
async close () {
|
|
40
|
-
|
|
41
|
-
|
|
94
|
+
/* shutdown stream */
|
|
95
|
+
if (this.stream !== null) {
|
|
96
|
+
await new Promise<void>((resolve) => {
|
|
97
|
+
if (this.stream instanceof Stream.Writable || this.stream instanceof Stream.Duplex)
|
|
98
|
+
this.stream.end(() => { resolve() })
|
|
99
|
+
else
|
|
100
|
+
resolve()
|
|
101
|
+
})
|
|
102
|
+
if (this.params.path !== "-")
|
|
103
|
+
this.stream.destroy()
|
|
42
104
|
this.stream = null
|
|
43
105
|
}
|
|
44
106
|
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
import { EventEmitter } from "node:events"
|
|
10
|
+
|
|
11
|
+
/* external dependencies */
|
|
12
|
+
import { Ollama } from "ollama"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode from "./speechflow-node"
|
|
16
|
+
|
|
17
|
+
/* internal utility types */
|
|
18
|
+
type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
|
|
19
|
+
type Config = { [ key: string ]: ConfigEntry }
|
|
20
|
+
|
|
21
|
+
/* SpeechFlow node for Gemma/Ollama text-to-text translation */
|
|
22
|
+
export default class SpeechFlowNodeGemma extends SpeechFlowNode {
|
|
23
|
+
/* declare official node name */
|
|
24
|
+
public static name = "gemma"
|
|
25
|
+
|
|
26
|
+
/* internal state */
|
|
27
|
+
private ollama: Ollama | null = null
|
|
28
|
+
|
|
29
|
+
/* internal LLM setup */
|
|
30
|
+
private setup: Config = {
|
|
31
|
+
/* English (EN) to German (DE) translation */
|
|
32
|
+
"en-de": {
|
|
33
|
+
systemPrompt:
|
|
34
|
+
"You are a translator.\n" +
|
|
35
|
+
"Output only the requested text.\n" +
|
|
36
|
+
"Do not use markdown.\n" +
|
|
37
|
+
"Do not chat.\n" +
|
|
38
|
+
"Do not show any explanations.\n" +
|
|
39
|
+
"Do not show any introduction.\n" +
|
|
40
|
+
"Do not show any preamble.\n" +
|
|
41
|
+
"Do not show any prolog.\n" +
|
|
42
|
+
"Do not show any epilog.\n" +
|
|
43
|
+
"Get to the point.\n" +
|
|
44
|
+
"Directly translate text from Enlish (EN) to German (DE) language.\n",
|
|
45
|
+
chat: [
|
|
46
|
+
{ role: "user", content: "I love my wife." },
|
|
47
|
+
{ role: "system", content: "Ich liebe meine Frau." },
|
|
48
|
+
{ role: "user", content: "The weather is wonderful." },
|
|
49
|
+
{ role: "system", content: "Das Wetter ist wunderschön." },
|
|
50
|
+
{ role: "user", content: "The live is awesome." },
|
|
51
|
+
{ role: "system", content: "Das Leben ist einfach großartig." }
|
|
52
|
+
]
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
/* German (DE) to English (EN) translation */
|
|
56
|
+
"de-en": {
|
|
57
|
+
systemPrompt:
|
|
58
|
+
"You are a translator.\n" +
|
|
59
|
+
"Output only the requested text.\n" +
|
|
60
|
+
"Do not use markdown.\n" +
|
|
61
|
+
"Do not chat.\n" +
|
|
62
|
+
"Do not show any explanations. \n" +
|
|
63
|
+
"Do not show any introduction.\n" +
|
|
64
|
+
"Do not show any preamble. \n" +
|
|
65
|
+
"Do not show any prolog. \n" +
|
|
66
|
+
"Do not show any epilog. \n" +
|
|
67
|
+
"Get to the point.\n" +
|
|
68
|
+
"Directly translate text from German (DE) to English (EN) language.\n",
|
|
69
|
+
chat: [
|
|
70
|
+
{ role: "user", content: "Ich liebe meine Frau." },
|
|
71
|
+
{ role: "system", content: "I love my wife." },
|
|
72
|
+
{ role: "user", content: "Das Wetter ist wunderschön." },
|
|
73
|
+
{ role: "system", content: "The weather is wonderful." },
|
|
74
|
+
{ role: "user", content: "Das Leben ist einfach großartig." },
|
|
75
|
+
{ role: "system", content: "The live is awesome." }
|
|
76
|
+
]
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/* construct node */
|
|
81
|
+
constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
|
|
82
|
+
super(id, opts, args)
|
|
83
|
+
|
|
84
|
+
/* declare node configuration parameters */
|
|
85
|
+
this.configure({
|
|
86
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?:\d+$/ },
|
|
87
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
88
|
+
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
/* sanity check situation */
|
|
92
|
+
if (this.params.src === this.params.dst)
|
|
93
|
+
throw new Error("source and destination languages cannot be the same")
|
|
94
|
+
|
|
95
|
+
/* declare node input/output format */
|
|
96
|
+
this.input = "text"
|
|
97
|
+
this.output = "text"
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/* open node */
|
|
101
|
+
async open () {
|
|
102
|
+
/* instantiate Ollama API */
|
|
103
|
+
this.ollama = new Ollama({ host: this.params.api })
|
|
104
|
+
|
|
105
|
+
/* provide text-to-text translation */
|
|
106
|
+
const translate = async (text: string) => {
|
|
107
|
+
const key = `${this.params.src}-${this.params.dst}`
|
|
108
|
+
const cfg = this.setup[key]
|
|
109
|
+
const response = await this.ollama!.chat({
|
|
110
|
+
model: "gemma3:4b-it-q4_K_M",
|
|
111
|
+
messages: [
|
|
112
|
+
{ role: "system", content: cfg.systemPrompt },
|
|
113
|
+
...cfg.chat,
|
|
114
|
+
{ role: "user", content: text }
|
|
115
|
+
],
|
|
116
|
+
keep_alive: "10m",
|
|
117
|
+
options: {
|
|
118
|
+
repeat_penalty: 1.1,
|
|
119
|
+
temperature: 0.7,
|
|
120
|
+
seed: 1,
|
|
121
|
+
top_k: 10,
|
|
122
|
+
top_p: 0.5
|
|
123
|
+
}
|
|
124
|
+
})
|
|
125
|
+
return response.message.content
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/* establish a duplex stream and connect it to Ollama */
|
|
129
|
+
const queue = new EventEmitter()
|
|
130
|
+
this.stream = new Stream.Duplex({
|
|
131
|
+
write (chunk: Buffer, encoding, callback) {
|
|
132
|
+
const data = chunk.toString()
|
|
133
|
+
if (data === "") {
|
|
134
|
+
queue.emit("result", "")
|
|
135
|
+
callback()
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
translate(data).then((result) => {
|
|
139
|
+
queue.emit("result", result)
|
|
140
|
+
callback()
|
|
141
|
+
}).catch((err) => {
|
|
142
|
+
callback(err)
|
|
143
|
+
})
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
read (size) {
|
|
147
|
+
queue.once("result", (result: string) => {
|
|
148
|
+
this.push(result)
|
|
149
|
+
})
|
|
150
|
+
}
|
|
151
|
+
})
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/* close node */
|
|
155
|
+
async close () {
|
|
156
|
+
/* close stream */
|
|
157
|
+
if (this.stream !== null) {
|
|
158
|
+
this.stream.destroy()
|
|
159
|
+
this.stream = null
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/* shutdown Ollama */
|
|
163
|
+
if (this.ollama !== null) {
|
|
164
|
+
this.ollama.abort()
|
|
165
|
+
this.ollama = null
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|