speechflow 0.9.4 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +227 -54
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
- package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
- package/dst/speechflow-node-a2a-wav.d.ts +11 -0
- package/dst/speechflow-node-a2a-wav.js +170 -0
- package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
- package/dst/speechflow-node-a2t-deepgram.js +220 -0
- package/dst/speechflow-node-deepgram.d.ts +3 -1
- package/dst/speechflow-node-deepgram.js +86 -22
- package/dst/speechflow-node-deepl.d.ts +3 -1
- package/dst/speechflow-node-deepl.js +25 -20
- package/dst/speechflow-node-device.d.ts +3 -1
- package/dst/speechflow-node-device.js +53 -2
- package/dst/speechflow-node-elevenlabs.d.ts +4 -1
- package/dst/speechflow-node-elevenlabs.js +88 -49
- package/dst/speechflow-node-ffmpeg.d.ts +3 -1
- package/dst/speechflow-node-ffmpeg.js +42 -4
- package/dst/speechflow-node-file.d.ts +3 -1
- package/dst/speechflow-node-file.js +84 -13
- package/dst/speechflow-node-format.d.ts +11 -0
- package/dst/speechflow-node-format.js +80 -0
- package/dst/speechflow-node-gemma.d.ts +3 -1
- package/dst/speechflow-node-gemma.js +84 -23
- package/dst/speechflow-node-mqtt.d.ts +13 -0
- package/dst/speechflow-node-mqtt.js +181 -0
- package/dst/speechflow-node-opus.d.ts +12 -0
- package/dst/speechflow-node-opus.js +135 -0
- package/dst/speechflow-node-subtitle.d.ts +12 -0
- package/dst/speechflow-node-subtitle.js +96 -0
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
- package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
- package/dst/speechflow-node-t2t-deepl.js +133 -0
- package/dst/speechflow-node-t2t-format.d.ts +11 -0
- package/dst/speechflow-node-t2t-format.js +80 -0
- package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
- package/dst/speechflow-node-t2t-gemma.js +213 -0
- package/dst/speechflow-node-t2t-opus.d.ts +12 -0
- package/dst/speechflow-node-t2t-opus.js +135 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
- package/dst/speechflow-node-t2t-subtitle.js +96 -0
- package/dst/speechflow-node-trace.d.ts +11 -0
- package/dst/speechflow-node-trace.js +88 -0
- package/dst/speechflow-node-wav.d.ts +11 -0
- package/dst/speechflow-node-wav.js +170 -0
- package/dst/speechflow-node-websocket.d.ts +3 -1
- package/dst/speechflow-node-websocket.js +149 -49
- package/dst/speechflow-node-whisper-common.d.ts +34 -0
- package/dst/speechflow-node-whisper-common.js +7 -0
- package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-ggml.js +97 -0
- package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker.js +116 -0
- package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker2.js +82 -0
- package/dst/speechflow-node-whisper.d.ts +19 -0
- package/dst/speechflow-node-whisper.js +604 -0
- package/dst/speechflow-node-x2x-trace.d.ts +11 -0
- package/dst/speechflow-node-x2x-trace.js +88 -0
- package/dst/speechflow-node-xio-device.d.ts +13 -0
- package/dst/speechflow-node-xio-device.js +205 -0
- package/dst/speechflow-node-xio-file.d.ts +11 -0
- package/dst/speechflow-node-xio-file.js +176 -0
- package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
- package/dst/speechflow-node-xio-mqtt.js +181 -0
- package/dst/speechflow-node-xio-websocket.d.ts +13 -0
- package/dst/speechflow-node-xio-websocket.js +275 -0
- package/dst/speechflow-node.d.ts +25 -7
- package/dst/speechflow-node.js +74 -9
- package/dst/speechflow-utils.d.ts +23 -0
- package/dst/speechflow-utils.js +194 -0
- package/dst/speechflow.js +146 -43
- package/etc/biome.jsonc +12 -4
- package/etc/stx.conf +65 -0
- package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
- package/package.json +49 -31
- package/sample.yaml +61 -23
- package/src/lib.d.ts +6 -1
- package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
- package/src/speechflow-node-a2a-wav.ts +143 -0
- package/src/speechflow-node-a2t-deepgram.ts +199 -0
- package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
- package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
- package/src/speechflow-node-t2t-format.ts +85 -0
- package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
- package/src/speechflow-node-t2t-opus.ts +111 -0
- package/src/speechflow-node-t2t-subtitle.ts +101 -0
- package/src/speechflow-node-x2x-trace.ts +92 -0
- package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
- package/src/speechflow-node-xio-file.ts +153 -0
- package/src/speechflow-node-xio-mqtt.ts +154 -0
- package/src/speechflow-node-xio-websocket.ts +248 -0
- package/src/speechflow-node.ts +78 -13
- package/src/speechflow-utils.ts +212 -0
- package/src/speechflow.ts +150 -43
- package/etc/nps.yaml +0 -40
- package/src/speechflow-node-deepgram.ts +0 -133
- package/src/speechflow-node-elevenlabs.ts +0 -116
- package/src/speechflow-node-file.ts +0 -108
- package/src/speechflow-node-websocket.ts +0 -179
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import ws from "ws"
|
|
12
|
+
import ReconnWebsocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
import * as utils from "./speechflow-utils"
|
|
17
|
+
|
|
18
|
+
/* SpeechFlow node for Websocket networking */
|
|
19
|
+
export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
|
|
20
|
+
/* declare official node name */
|
|
21
|
+
public static name = "websocket"
|
|
22
|
+
|
|
23
|
+
/* internal state */
|
|
24
|
+
private server: ws.WebSocketServer | null = null
|
|
25
|
+
private client: WebSocket | null = null
|
|
26
|
+
|
|
27
|
+
/* construct node */
|
|
28
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
29
|
+
super(id, cfg, opts, args)
|
|
30
|
+
|
|
31
|
+
/* declare node configuration parameters */
|
|
32
|
+
this.configure({
|
|
33
|
+
listen: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+))$/ },
|
|
34
|
+
connect: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/ },
|
|
35
|
+
mode: { type: "string", val: "r", match: /^(?:r|w|rw)$/ },
|
|
36
|
+
type: { type: "string", val: "text", match: /^(?:audio|text)$/ }
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
/* sanity check usage */
|
|
40
|
+
if (this.params.listen !== "" && this.params.connect !== "")
|
|
41
|
+
throw new Error("Websocket node cannot listen and connect at the same time")
|
|
42
|
+
else if (this.params.listen === "" && this.params.connect === "")
|
|
43
|
+
throw new Error("Websocket node requires either listen or connect mode")
|
|
44
|
+
|
|
45
|
+
/* declare node input/output format */
|
|
46
|
+
if (this.params.mode === "rw") {
|
|
47
|
+
this.input = this.params.type
|
|
48
|
+
this.output = this.params.type
|
|
49
|
+
}
|
|
50
|
+
else if (this.params.mode === "r") {
|
|
51
|
+
this.input = "none"
|
|
52
|
+
this.output = this.params.type
|
|
53
|
+
}
|
|
54
|
+
else if (this.params.mode === "w") {
|
|
55
|
+
this.input = this.params.type
|
|
56
|
+
this.output = "none"
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/* open node */
|
|
61
|
+
async open () {
|
|
62
|
+
if (this.params.listen !== "") {
|
|
63
|
+
/* listen locally on a Websocket port */
|
|
64
|
+
const url = new URL(this.params.listen)
|
|
65
|
+
const websockets = new Set<ws.WebSocket>()
|
|
66
|
+
const chunkQueue = new utils.SingleQueue<SpeechFlowChunk>()
|
|
67
|
+
const server = new ws.WebSocketServer({
|
|
68
|
+
host: url.hostname,
|
|
69
|
+
port: Number.parseInt(url.port),
|
|
70
|
+
path: url.pathname
|
|
71
|
+
})
|
|
72
|
+
server.on("listening", () => {
|
|
73
|
+
this.log("info", `listening on URL ${this.params.listen}`)
|
|
74
|
+
})
|
|
75
|
+
server.on("connection", (ws, request) => {
|
|
76
|
+
const peer = `${request.socket.remoteAddress}:${request.socket.remotePort}`
|
|
77
|
+
this.log("info", `connection opened on URL ${this.params.listen} by peer ${peer}`)
|
|
78
|
+
websockets.add(ws)
|
|
79
|
+
ws.on("close", () => {
|
|
80
|
+
this.log("info", `connection closed on URL ${this.params.listen} by peer ${peer}`)
|
|
81
|
+
websockets.delete(ws)
|
|
82
|
+
})
|
|
83
|
+
ws.on("error", (error) => {
|
|
84
|
+
this.log("error", `error of connection on URL ${this.params.listen} for peer ${peer}: ${error.message}`)
|
|
85
|
+
})
|
|
86
|
+
ws.on("message", (data, isBinary) => {
|
|
87
|
+
if (this.params.mode === "w") {
|
|
88
|
+
this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
|
|
89
|
+
"received remote data on write-only node")
|
|
90
|
+
return
|
|
91
|
+
}
|
|
92
|
+
if (!isBinary) {
|
|
93
|
+
this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
|
|
94
|
+
"received non-binary message")
|
|
95
|
+
return
|
|
96
|
+
}
|
|
97
|
+
let buffer: Buffer
|
|
98
|
+
if (Buffer.isBuffer(data))
|
|
99
|
+
buffer = data
|
|
100
|
+
else if (data instanceof ArrayBuffer)
|
|
101
|
+
buffer = Buffer.from(data)
|
|
102
|
+
else
|
|
103
|
+
buffer = Buffer.concat(data)
|
|
104
|
+
const chunk = utils.streamChunkDecode(buffer)
|
|
105
|
+
chunkQueue.write(chunk)
|
|
106
|
+
})
|
|
107
|
+
})
|
|
108
|
+
server.on("error", (error) => {
|
|
109
|
+
this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`)
|
|
110
|
+
})
|
|
111
|
+
const type = this.params.type
|
|
112
|
+
const mode = this.params.mode
|
|
113
|
+
this.stream = new Stream.Duplex({
|
|
114
|
+
writableObjectMode: true,
|
|
115
|
+
readableObjectMode: true,
|
|
116
|
+
decodeStrings: false,
|
|
117
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
118
|
+
if (mode === "r")
|
|
119
|
+
callback(new Error("write operation on read-only node"))
|
|
120
|
+
else if (chunk.type !== type)
|
|
121
|
+
callback(new Error(`written chunk is not of ${type} type`))
|
|
122
|
+
else if (websockets.size === 0)
|
|
123
|
+
callback(new Error("still no Websocket connections available"))
|
|
124
|
+
else {
|
|
125
|
+
const data = utils.streamChunkEncode(chunk)
|
|
126
|
+
const results = []
|
|
127
|
+
for (const websocket of websockets.values()) {
|
|
128
|
+
results.push(new Promise<void>((resolve, reject) => {
|
|
129
|
+
websocket.send(data, (error) => {
|
|
130
|
+
if (error)
|
|
131
|
+
reject(error)
|
|
132
|
+
else
|
|
133
|
+
resolve()
|
|
134
|
+
})
|
|
135
|
+
}))
|
|
136
|
+
}
|
|
137
|
+
Promise.all(results).then(() => {
|
|
138
|
+
callback()
|
|
139
|
+
}).catch((errors: Error[]) => {
|
|
140
|
+
const error = new Error(errors.map((e) => e.message).join("; "))
|
|
141
|
+
callback(error)
|
|
142
|
+
})
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
read (size: number) {
|
|
146
|
+
if (mode === "w")
|
|
147
|
+
throw new Error("read operation on write-only node")
|
|
148
|
+
chunkQueue.read().then((chunk) => {
|
|
149
|
+
this.push(chunk, "binary")
|
|
150
|
+
})
|
|
151
|
+
}
|
|
152
|
+
})
|
|
153
|
+
}
|
|
154
|
+
else if (this.params.connect !== "") {
|
|
155
|
+
/* connect remotely to a Websocket port */
|
|
156
|
+
this.client = new ReconnWebsocket(this.params.connect, [], {
|
|
157
|
+
WebSocket: ws,
|
|
158
|
+
WebSocketOptions: {},
|
|
159
|
+
reconnectionDelayGrowFactor: 1.3,
|
|
160
|
+
maxReconnectionDelay: 4000,
|
|
161
|
+
minReconnectionDelay: 1000,
|
|
162
|
+
connectionTimeout: 4000,
|
|
163
|
+
minUptime: 5000
|
|
164
|
+
})
|
|
165
|
+
this.client.addEventListener("open", (ev: Event) => {
|
|
166
|
+
this.log("info", `connection opened to URL ${this.params.connect}`)
|
|
167
|
+
})
|
|
168
|
+
this.client.addEventListener("close", (ev: Event) => {
|
|
169
|
+
this.log("info", `connection closed to URL ${this.params.connect}`)
|
|
170
|
+
})
|
|
171
|
+
this.client.addEventListener("error", (ev: ErrorEvent) => {
|
|
172
|
+
this.log("error", `error of connection on URL ${this.params.connect}: ${ev.error.message}`)
|
|
173
|
+
})
|
|
174
|
+
const chunkQueue = new utils.SingleQueue<SpeechFlowChunk>()
|
|
175
|
+
this.client.addEventListener("message", (ev: MessageEvent) => {
|
|
176
|
+
if (this.params.mode === "w") {
|
|
177
|
+
this.log("warning", `connection to URL ${this.params.listen}: ` +
|
|
178
|
+
"received remote data on write-only node")
|
|
179
|
+
return
|
|
180
|
+
}
|
|
181
|
+
if (!(ev.data instanceof ArrayBuffer)) {
|
|
182
|
+
this.log("warning", `connection to URL ${this.params.listen}: ` +
|
|
183
|
+
"received non-binary message")
|
|
184
|
+
return
|
|
185
|
+
}
|
|
186
|
+
const buffer = Buffer.from(ev.data)
|
|
187
|
+
const chunk = utils.streamChunkDecode(buffer)
|
|
188
|
+
chunkQueue.write(chunk)
|
|
189
|
+
})
|
|
190
|
+
const client = this.client
|
|
191
|
+
client.binaryType = "arraybuffer"
|
|
192
|
+
const type = this.params.type
|
|
193
|
+
const mode = this.params.mode
|
|
194
|
+
this.stream = new Stream.Duplex({
|
|
195
|
+
writableObjectMode: true,
|
|
196
|
+
readableObjectMode: true,
|
|
197
|
+
decodeStrings: false,
|
|
198
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
199
|
+
if (mode === "r")
|
|
200
|
+
callback(new Error("write operation on read-only node"))
|
|
201
|
+
else if (chunk.type !== type)
|
|
202
|
+
callback(new Error(`written chunk is not of ${type} type`))
|
|
203
|
+
else if (!client.OPEN)
|
|
204
|
+
callback(new Error("still no Websocket connection available"))
|
|
205
|
+
const data = utils.streamChunkEncode(chunk)
|
|
206
|
+
client.send(data)
|
|
207
|
+
callback()
|
|
208
|
+
},
|
|
209
|
+
read (size: number) {
|
|
210
|
+
if (mode === "w")
|
|
211
|
+
throw new Error("read operation on write-only node")
|
|
212
|
+
if (!client.OPEN)
|
|
213
|
+
throw new Error("still no Websocket connection available")
|
|
214
|
+
chunkQueue.read().then((chunk) => {
|
|
215
|
+
this.push(chunk, "binary")
|
|
216
|
+
})
|
|
217
|
+
}
|
|
218
|
+
})
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/* close node */
|
|
223
|
+
async close () {
|
|
224
|
+
/* close Websocket server */
|
|
225
|
+
if (this.server !== null) {
|
|
226
|
+
await new Promise<void>((resolve, reject) => {
|
|
227
|
+
this.server!.close((error) => {
|
|
228
|
+
if (error) reject(error)
|
|
229
|
+
else resolve()
|
|
230
|
+
})
|
|
231
|
+
})
|
|
232
|
+
this.server = null
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/* close Websocket client */
|
|
236
|
+
if (this.client !== null) {
|
|
237
|
+
this.client!.close()
|
|
238
|
+
this.client = null
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/* close stream */
|
|
242
|
+
if (this.stream !== null) {
|
|
243
|
+
this.stream.destroy()
|
|
244
|
+
this.stream = null
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
package/src/speechflow-node.ts
CHANGED
|
@@ -7,17 +7,44 @@
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import Events from "node:events"
|
|
9
9
|
import Stream from "node:stream"
|
|
10
|
+
import { DateTime, Duration } from "luxon"
|
|
11
|
+
|
|
12
|
+
/* the definition of a single payload chunk passed through the SpeechFlow nodes */
|
|
13
|
+
export class SpeechFlowChunk {
|
|
14
|
+
constructor (
|
|
15
|
+
public timestampStart: Duration,
|
|
16
|
+
public timestampEnd: Duration,
|
|
17
|
+
public kind: "intermediate" | "final",
|
|
18
|
+
public type: "audio" | "text",
|
|
19
|
+
public payload: Buffer | string
|
|
20
|
+
) {}
|
|
21
|
+
clone () {
|
|
22
|
+
let payload: Buffer | string
|
|
23
|
+
if (Buffer.isBuffer(this.payload))
|
|
24
|
+
payload = Buffer.from(this.payload)
|
|
25
|
+
else
|
|
26
|
+
payload = String(this.payload)
|
|
27
|
+
return new SpeechFlowChunk(
|
|
28
|
+
Duration.fromMillis(this.timestampStart.toMillis()),
|
|
29
|
+
Duration.fromMillis(this.timestampEnd.toMillis()),
|
|
30
|
+
this.kind,
|
|
31
|
+
this.type,
|
|
32
|
+
payload
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
}
|
|
10
36
|
|
|
11
37
|
/* the base class for all SpeechFlow nodes */
|
|
12
38
|
export default class SpeechFlowNode extends Events.EventEmitter {
|
|
13
39
|
/* general constant configuration (for reference) */
|
|
14
40
|
config = {
|
|
15
|
-
audioChannels: 1,
|
|
16
|
-
audioBitDepth: 16,
|
|
17
|
-
audioLittleEndian: true,
|
|
18
|
-
audioSampleRate: 48000,
|
|
19
|
-
textEncoding: "utf8"
|
|
20
|
-
|
|
41
|
+
audioChannels: 1, /* audio mono channel */
|
|
42
|
+
audioBitDepth: 16 as (1 | 8 | 16 | 24 | 32), /* audio PCM 16-bit integer */
|
|
43
|
+
audioLittleEndian: true, /* audio PCM little-endian */
|
|
44
|
+
audioSampleRate: 48000, /* audio 48kHz sample rate */
|
|
45
|
+
textEncoding: "utf8" as BufferEncoding, /* UTF-8 text encoding */
|
|
46
|
+
cacheDir: "" /* directory for cache files */
|
|
47
|
+
}
|
|
21
48
|
|
|
22
49
|
/* announced information */
|
|
23
50
|
input = "none"
|
|
@@ -26,18 +53,35 @@ export default class SpeechFlowNode extends Events.EventEmitter {
|
|
|
26
53
|
stream: Stream.Writable | Stream.Readable | Stream.Duplex | null = null
|
|
27
54
|
connectionsIn = new Set<SpeechFlowNode>()
|
|
28
55
|
connectionsOut = new Set<SpeechFlowNode>()
|
|
56
|
+
timeOpen: DateTime<boolean> | undefined
|
|
57
|
+
timeZero: DateTime<boolean> = DateTime.fromMillis(0)
|
|
58
|
+
timeZeroOffset: Duration<boolean> = Duration.fromMillis(0)
|
|
29
59
|
|
|
30
60
|
/* the default constructor */
|
|
31
61
|
constructor (
|
|
32
62
|
public id: string,
|
|
63
|
+
private cfg: { [ id: string ]: any },
|
|
33
64
|
private opts: { [ id: string ]: any },
|
|
34
65
|
private args: any[]
|
|
35
66
|
) {
|
|
36
67
|
super()
|
|
68
|
+
for (const key of Object.keys(cfg)) {
|
|
69
|
+
const idx = key as keyof typeof this.config
|
|
70
|
+
if (this.config[idx] !== undefined)
|
|
71
|
+
(this.config[idx] as any) = cfg[key]
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/* set base/zero time for relative timestamp calculations */
|
|
76
|
+
setTimeZero (time: DateTime) {
|
|
77
|
+
this.timeZero = time
|
|
78
|
+
if (this.timeOpen === undefined)
|
|
79
|
+
this.timeOpen = this.timeZero
|
|
80
|
+
this.timeZeroOffset = this.timeZero.diff(this.timeOpen)
|
|
37
81
|
}
|
|
38
82
|
|
|
39
83
|
/* INTERNAL: utility function: create "params" attribute from constructor of sub-classes */
|
|
40
|
-
configure (spec: { [ id: string ]: { type: string, pos?: number, val?: any, match?: RegExp } }) {
|
|
84
|
+
configure (spec: { [ id: string ]: { type: string, pos?: number, val?: any, match?: RegExp | ((x: any) => boolean) } }) {
|
|
41
85
|
for (const name of Object.keys(spec)) {
|
|
42
86
|
if (this.opts[name] !== undefined) {
|
|
43
87
|
/* named parameter */
|
|
@@ -45,9 +89,11 @@ export default class SpeechFlowNode extends Events.EventEmitter {
|
|
|
45
89
|
throw new Error(`invalid type of named parameter "${name}" ` +
|
|
46
90
|
`(has to be ${spec[name].type})`)
|
|
47
91
|
if ("match" in spec[name]
|
|
48
|
-
&&
|
|
49
|
-
|
|
50
|
-
|
|
92
|
+
&& ( ( spec[name].match instanceof RegExp
|
|
93
|
+
&& this.opts[name].match(spec[name].match) === null)
|
|
94
|
+
|| ( typeof spec[name].match === "function"
|
|
95
|
+
&& !spec[name].match(this.opts[name]) ) ))
|
|
96
|
+
throw new Error(`invalid value "${this.opts[name]}" of named parameter "${name}"`)
|
|
51
97
|
this.params[name] = this.opts[name]
|
|
52
98
|
}
|
|
53
99
|
else if (this.opts[name] === undefined
|
|
@@ -55,14 +101,20 @@ export default class SpeechFlowNode extends Events.EventEmitter {
|
|
|
55
101
|
&& typeof spec[name].pos === "number"
|
|
56
102
|
&& spec[name].pos < this.args.length) {
|
|
57
103
|
/* positional argument */
|
|
58
|
-
if (typeof this.args[spec[name].pos
|
|
104
|
+
if (typeof this.args[spec[name].pos] !== spec[name].type)
|
|
59
105
|
throw new Error(`invalid type of positional parameter "${name}" ` +
|
|
60
106
|
`(has to be ${spec[name].type})`)
|
|
61
107
|
if ("match" in spec[name]
|
|
62
|
-
&& this.args[spec[name].pos
|
|
108
|
+
&& this.args[spec[name].pos].match(spec[name].match) === null)
|
|
63
109
|
throw new Error(`invalid value of positional parameter "${name}" ` +
|
|
64
110
|
`(has to match ${spec[name].match})`)
|
|
65
|
-
|
|
111
|
+
if ("match" in spec[name]
|
|
112
|
+
&& ( ( spec[name].match instanceof RegExp
|
|
113
|
+
&& this.args[spec[name].pos].match(spec[name].match) === null)
|
|
114
|
+
|| ( typeof spec[name].match === "function"
|
|
115
|
+
&& !spec[name].match(this.args[spec[name].pos]) ) ))
|
|
116
|
+
throw new Error(`invalid value "${this.opts[name]}" of positional parameter "${name}"`)
|
|
117
|
+
this.params[name] = this.args[spec[name].pos]
|
|
66
118
|
}
|
|
67
119
|
else if ("val" in spec[name] && spec[name].val !== undefined)
|
|
68
120
|
/* default argument */
|
|
@@ -70,6 +122,19 @@ export default class SpeechFlowNode extends Events.EventEmitter {
|
|
|
70
122
|
else
|
|
71
123
|
throw new Error(`required parameter "${name}" not given`)
|
|
72
124
|
}
|
|
125
|
+
for (const name of Object.keys(this.opts)) {
|
|
126
|
+
if (spec[name] === undefined)
|
|
127
|
+
throw new Error(`named parameter "${name}" not known`)
|
|
128
|
+
}
|
|
129
|
+
for (let i = 0; i < this.args.length; i++) {
|
|
130
|
+
let found = false
|
|
131
|
+
for (const name of Object.keys(spec))
|
|
132
|
+
if (spec[name].pos === i)
|
|
133
|
+
found = true
|
|
134
|
+
if (!found)
|
|
135
|
+
throw new Error(`positional parameter #${i} ("${this.args[i]}") ` +
|
|
136
|
+
"not mappable to any known argument")
|
|
137
|
+
}
|
|
73
138
|
}
|
|
74
139
|
|
|
75
140
|
/* connect node to another one */
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* external dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
import { EventEmitter } from "node:events"
|
|
10
|
+
import { DateTime, Duration } from "luxon"
|
|
11
|
+
import CBOR from "cbor2"
|
|
12
|
+
|
|
13
|
+
/* internal dependencies */
|
|
14
|
+
import { SpeechFlowChunk } from "./speechflow-node"
|
|
15
|
+
|
|
16
|
+
/* calculate duration of an audio buffer */
|
|
17
|
+
export function audioBufferDuration (
|
|
18
|
+
buffer: Buffer,
|
|
19
|
+
sampleRate = 48000,
|
|
20
|
+
bitDepth = 16,
|
|
21
|
+
channels = 1,
|
|
22
|
+
littleEndian = true
|
|
23
|
+
) {
|
|
24
|
+
if (!Buffer.isBuffer(buffer))
|
|
25
|
+
throw new Error("invalid input (Buffer expected)")
|
|
26
|
+
if (littleEndian !== true)
|
|
27
|
+
throw new Error("only Little Endian supported")
|
|
28
|
+
|
|
29
|
+
const bytesPerSample = bitDepth / 8
|
|
30
|
+
const totalSamples = buffer.length / (bytesPerSample * channels)
|
|
31
|
+
return totalSamples / sampleRate
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/* create a Duplex/Transform stream which has
|
|
35
|
+
object-mode on Writable side and buffer/string-mode on Readable side */
|
|
36
|
+
export function createTransformStreamForWritableSide () {
|
|
37
|
+
return new Stream.Transform({
|
|
38
|
+
readableObjectMode: true,
|
|
39
|
+
writableObjectMode: true,
|
|
40
|
+
decodeStrings: false,
|
|
41
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
42
|
+
this.push(chunk.payload)
|
|
43
|
+
callback()
|
|
44
|
+
}
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/* create a Duplex/Transform stream which has
|
|
49
|
+
object-mode on Readable side and buffer/string-mode on Writable side */
|
|
50
|
+
export function createTransformStreamForReadableSide (type: "text" | "audio", getTimeZero: () => DateTime) {
|
|
51
|
+
return new Stream.Transform({
|
|
52
|
+
readableObjectMode: true,
|
|
53
|
+
writableObjectMode: true,
|
|
54
|
+
decodeStrings: false,
|
|
55
|
+
transform (chunk: Buffer | string, encoding, callback) {
|
|
56
|
+
const timeZero = getTimeZero()
|
|
57
|
+
const start = DateTime.now().diff(timeZero)
|
|
58
|
+
let end = start
|
|
59
|
+
if (type === "audio") {
|
|
60
|
+
const duration = audioBufferDuration(chunk as Buffer)
|
|
61
|
+
end = start.plus(duration * 1000)
|
|
62
|
+
}
|
|
63
|
+
const obj = new SpeechFlowChunk(start, end, "final", type, chunk)
|
|
64
|
+
this.push(obj)
|
|
65
|
+
callback()
|
|
66
|
+
}
|
|
67
|
+
})
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/* ensure a chunk is of a certain type and format */
|
|
71
|
+
export function ensureStreamChunk (type: "audio" | "text", chunk: SpeechFlowChunk | Buffer | string) {
|
|
72
|
+
if (chunk instanceof SpeechFlowChunk) {
|
|
73
|
+
if (chunk.type !== type)
|
|
74
|
+
throw new Error(`invalid payload chunk (expected ${type} type, received ${chunk.type} type)`)
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
if (type === "text" && Buffer.isBuffer(chunk))
|
|
78
|
+
chunk = chunk.toString("utf8")
|
|
79
|
+
else if (type === "audio" && !Buffer.isBuffer(chunk))
|
|
80
|
+
chunk = Buffer.from(chunk)
|
|
81
|
+
}
|
|
82
|
+
return chunk
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* type of a serialized SpeechFlow chunk */
|
|
86
|
+
type SpeechFlowChunkSerialized = {
|
|
87
|
+
timestampStart: number,
|
|
88
|
+
timestampEnd: number,
|
|
89
|
+
kind: string,
|
|
90
|
+
type: string,
|
|
91
|
+
payload: Uint8Array
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/* encode/serialize chunk of data */
|
|
95
|
+
export function streamChunkEncode (chunk: SpeechFlowChunk) {
|
|
96
|
+
let payload: Uint8Array
|
|
97
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
98
|
+
payload = new Uint8Array(chunk.payload)
|
|
99
|
+
else {
|
|
100
|
+
const encoder = new TextEncoder()
|
|
101
|
+
payload = encoder.encode(chunk.payload)
|
|
102
|
+
}
|
|
103
|
+
const data = {
|
|
104
|
+
timestampStart: chunk.timestampStart.toMillis(),
|
|
105
|
+
timestampEnd: chunk.timestampEnd.toMillis(),
|
|
106
|
+
kind: chunk.kind,
|
|
107
|
+
type: chunk.type,
|
|
108
|
+
payload
|
|
109
|
+
} satisfies SpeechFlowChunkSerialized
|
|
110
|
+
const _data = CBOR.encode(data)
|
|
111
|
+
return _data
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* decode/unserialize chunk of data */
|
|
115
|
+
export function streamChunkDecode (_data: Uint8Array) {
|
|
116
|
+
let data: SpeechFlowChunkSerialized
|
|
117
|
+
try {
|
|
118
|
+
data = CBOR.decode<SpeechFlowChunkSerialized>(_data)
|
|
119
|
+
}
|
|
120
|
+
catch (err: any) {
|
|
121
|
+
throw new Error(`CBOR decoding failed: ${err}`)
|
|
122
|
+
}
|
|
123
|
+
let payload: Buffer | string
|
|
124
|
+
if (data.type === "audio")
|
|
125
|
+
payload = Buffer.from(data.payload)
|
|
126
|
+
else
|
|
127
|
+
payload = (new TextDecoder()).decode(data.payload)
|
|
128
|
+
const chunk = new SpeechFlowChunk(
|
|
129
|
+
Duration.fromMillis(data.timestampStart),
|
|
130
|
+
Duration.fromMillis(data.timestampEnd),
|
|
131
|
+
data.kind as "intermediate" | "final",
|
|
132
|
+
data.type as "audio" | "text",
|
|
133
|
+
payload
|
|
134
|
+
)
|
|
135
|
+
return chunk
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/* helper class for single item queue */
|
|
139
|
+
export class SingleQueue<T> extends EventEmitter {
|
|
140
|
+
private queue = new Array<T>()
|
|
141
|
+
write (item: T) {
|
|
142
|
+
this.queue.unshift(item)
|
|
143
|
+
this.emit("dequeue")
|
|
144
|
+
}
|
|
145
|
+
read () {
|
|
146
|
+
return new Promise<T>((resolve, reject) => {
|
|
147
|
+
const consume = () => {
|
|
148
|
+
if (this.queue.length > 0)
|
|
149
|
+
return this.queue.pop()!
|
|
150
|
+
else
|
|
151
|
+
return null
|
|
152
|
+
}
|
|
153
|
+
let item = consume()
|
|
154
|
+
if (item !== null)
|
|
155
|
+
resolve(item)
|
|
156
|
+
else {
|
|
157
|
+
const tryToConsume = () => {
|
|
158
|
+
item = consume()
|
|
159
|
+
if (item !== null)
|
|
160
|
+
resolve(item)
|
|
161
|
+
else
|
|
162
|
+
this.once("dequeue", tryToConsume)
|
|
163
|
+
}
|
|
164
|
+
this.once("dequeue", tryToConsume)
|
|
165
|
+
}
|
|
166
|
+
})
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/* helper class for double-item queue */
|
|
171
|
+
export class DoubleQueue<T0, T1> extends EventEmitter {
|
|
172
|
+
private queue0 = new Array<T0>()
|
|
173
|
+
private queue1 = new Array<T1>()
|
|
174
|
+
private notify () {
|
|
175
|
+
if (this.queue0.length > 0 && this.queue1.length > 0)
|
|
176
|
+
this.emit("dequeue")
|
|
177
|
+
}
|
|
178
|
+
write0 (item: T0) {
|
|
179
|
+
this.queue0.unshift(item)
|
|
180
|
+
this.notify()
|
|
181
|
+
}
|
|
182
|
+
write1 (item: T1) {
|
|
183
|
+
this.queue1.unshift(item)
|
|
184
|
+
this.notify()
|
|
185
|
+
}
|
|
186
|
+
read () {
|
|
187
|
+
return new Promise<[ T0, T1 ]>((resolve, reject) => {
|
|
188
|
+
const consume = (): [ T0, T1 ] | null => {
|
|
189
|
+
if (this.queue0.length > 0 && this.queue1.length > 0) {
|
|
190
|
+
const item0 = this.queue0.pop() as T0
|
|
191
|
+
const item1 = this.queue1.pop() as T1
|
|
192
|
+
return [ item0, item1 ]
|
|
193
|
+
}
|
|
194
|
+
else
|
|
195
|
+
return null
|
|
196
|
+
}
|
|
197
|
+
let items = consume()
|
|
198
|
+
if (items !== null)
|
|
199
|
+
resolve(items)
|
|
200
|
+
else {
|
|
201
|
+
const tryToConsume = () => {
|
|
202
|
+
items = consume()
|
|
203
|
+
if (items !== null)
|
|
204
|
+
resolve(items)
|
|
205
|
+
else
|
|
206
|
+
this.once("dequeue", tryToConsume)
|
|
207
|
+
}
|
|
208
|
+
this.once("dequeue", tryToConsume)
|
|
209
|
+
}
|
|
210
|
+
})
|
|
211
|
+
}
|
|
212
|
+
}
|