speechflow 1.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +387 -119
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  7. package/speechflow-cli/dst/speechflow-main-graph.js +28 -5
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +24 -4
  10. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
  12. package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
  13. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  14. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  15. package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
  16. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  17. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  18. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
  19. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  20. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  21. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
  22. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  23. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  24. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  25. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
  26. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  27. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  28. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  29. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  30. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  31. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  32. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  33. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  34. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
  35. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  36. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
  37. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  39. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  40. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  41. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  42. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  43. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  45. package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
  46. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  47. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  48. package/speechflow-cli/dst/speechflow-node-xio-file.js +79 -66
  49. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  51. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  52. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  53. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  54. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
  55. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  56. package/speechflow-cli/dst/speechflow-util-audio.js +4 -5
  57. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
  59. package/speechflow-cli/dst/speechflow-util-error.js +5 -0
  60. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  62. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  63. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  64. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  65. package/speechflow-cli/dst/speechflow-util.js +1 -0
  66. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  67. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  68. package/speechflow-cli/package.json +34 -17
  69. package/speechflow-cli/src/lib.d.ts +5 -0
  70. package/speechflow-cli/src/speechflow-main-graph.ts +31 -5
  71. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +24 -4
  72. package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
  73. package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
  74. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
  75. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
  76. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
  77. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
  78. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  79. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  80. package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
  81. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
  82. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  83. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  84. package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
  85. package/speechflow-cli/src/speechflow-node-xio-file.ts +92 -79
  86. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  87. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
  88. package/speechflow-cli/src/speechflow-util-audio.ts +5 -5
  89. package/speechflow-cli/src/speechflow-util-error.ts +9 -0
  90. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  91. package/speechflow-cli/src/speechflow-util.ts +1 -0
  92. package/speechflow-ui-db/package.json +9 -9
  93. package/speechflow-ui-st/package.json +9 -9
  94. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  95. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  96. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  97. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  98. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  99. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -0,0 +1,533 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+ import http from "node:http"
10
+ import crypto from "node:crypto"
11
+
12
+ /* external dependencies */
13
+ import { DateTime } from "luxon"
14
+ import * as arktype from "arktype"
15
+ import { OpusEncoder } from "@discordjs/opus"
16
+ import {
17
+ RTCPeerConnection, MediaStreamTrack,
18
+ RtpPacket, RtpHeader,
19
+ useAbsSendTime, useSdesMid
20
+ } from "werift"
21
+
22
+ /* internal dependencies */
23
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
24
+ import * as util from "./speechflow-util"
25
+
26
+ /* WebRTC peer connection state */
27
+ interface WebRTCConnection {
28
+ pc: RTCPeerConnection
29
+ track: MediaStreamTrack | null
30
+ resourceId: string
31
+ subscription: { unSubscribe: () => void } | null
32
+ }
33
+
34
+ /* SpeechFlow node for WebRTC networking (WHIP/WHEP) */
35
+ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
36
+ /* declare official node name */
37
+ public static name = "xio-webrtc"
38
+
39
+ /* internal state */
40
+ private peerConnections = new Map<string, WebRTCConnection>()
41
+ private httpServer: http.Server | null = null
42
+ private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
43
+ private opusEncoder: OpusEncoder | null = null
44
+ private opusDecoder: OpusEncoder | null = null
45
+ private pcmBuffer = Buffer.alloc(0)
46
+ private rtpSequence = 0
47
+ private rtpTimestamp = 0
48
+ private rtpSSRC = 0
49
+ private maxConnections = 10
50
+
51
+ /* Opus codec configuration: 48kHz, mono, 16-bit */
52
+ private readonly OPUS_SAMPLE_RATE = 48000
53
+ private readonly OPUS_CHANNELS = 1
54
+ private readonly OPUS_BIT_DEPTH = 16
55
+ private readonly OPUS_FRAME_SIZE = 960 /* 20ms at 48kHz = 960 samples */
56
+ private readonly OPUS_FRAME_BYTES = 960 * 2 /* 16-bit = 2 bytes per sample */
57
+
58
+ /* construct node */
59
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
60
+ super(id, cfg, opts, args)
61
+
62
+ /* declare node configuration parameters */
63
+ this.configure({
64
+ listen: { type: "string", pos: 0, val: "8085", match: /^(?:\d+|.+?:\d+)$/ },
65
+ path: { type: "string", pos: 1, val: "/webrtc", match: /^\/.+$/ },
66
+ mode: { type: "string", pos: 2, val: "r", match: /^(?:r|w)$/ },
67
+ iceServers: { type: "string", pos: 3, val: "", match: /^.*$/ }
68
+ })
69
+
70
+ /* declare node input/output format */
71
+ if (this.params.mode === "r") {
72
+ this.input = "none"
73
+ this.output = "audio"
74
+ }
75
+ else if (this.params.mode === "w") {
76
+ this.input = "audio"
77
+ this.output = "none"
78
+ }
79
+ }
80
+
81
+ /* parse address:port string */
82
+ private parseAddress (addr: string, defaultPort: number): { host: string, port: number } {
83
+ if (addr.match(/^\d+$/))
84
+ return { host: "0.0.0.0", port: Number.parseInt(addr, 10) }
85
+ const m = addr.match(/^(.+?):(\d+)$/)
86
+ if (m === null)
87
+ return { host: addr, port: defaultPort }
88
+ return { host: m[1], port: Number.parseInt(m[2], 10) }
89
+ }
90
+
91
+ /* read HTTP request body */
92
+ private readRequestBody (req: http.IncomingMessage): Promise<string> {
93
+ return new Promise((resolve, reject) => {
94
+ const chunks: Buffer[] = []
95
+ const maxBodySize = 1024 * 1024 /* 1 MB limit for SDP */
96
+ let totalSize = 0
97
+ const onData = (chunk: Buffer) => {
98
+ totalSize += chunk.length
99
+ if (totalSize > maxBodySize) {
100
+ req.removeListener("data", onData)
101
+ req.removeListener("end", onEnd)
102
+ req.removeListener("error", onError)
103
+ req.destroy()
104
+ reject(new Error("request body too large"))
105
+ return
106
+ }
107
+ chunks.push(chunk)
108
+ }
109
+ const onEnd = () =>
110
+ resolve(Buffer.concat(chunks).toString("utf8"))
111
+ const onError = (err: Error) =>
112
+ reject(err)
113
+ req.on("data", onData)
114
+ req.on("end", onEnd)
115
+ req.on("error", onError)
116
+ })
117
+ }
118
+
119
+ /* decode Opus packet to PCM and enqueue as SpeechFlowChunk */
120
+ private decodeOpusToChunk (opusPacket: Buffer) {
121
+ if (this.opusDecoder === null || this.chunkQueue === null)
122
+ return
123
+ if (this.params.mode === "w")
124
+ return
125
+ try {
126
+ /* decode Opus to PCM (16-bit signed, little-endian, 48kHz) */
127
+ const pcmBuffer = this.opusDecoder.decode(opusPacket)
128
+
129
+ /* create chunk with timing information (use Opus codec rates, not config) */
130
+ const now = DateTime.now()
131
+ const start = now.diff(this.timeZero)
132
+ const duration = util.audioBufferDuration(pcmBuffer,
133
+ this.OPUS_SAMPLE_RATE, this.OPUS_BIT_DEPTH, this.OPUS_CHANNELS)
134
+ const end = start.plus(duration * 1000)
135
+ const chunk = new SpeechFlowChunk(start, end, "final", "audio", pcmBuffer)
136
+ this.chunkQueue.write(chunk)
137
+ }
138
+ catch (err: unknown) {
139
+ this.log("warning", `Opus decode error: ${util.ensureError(err).message}`)
140
+ }
141
+ }
142
+
143
+ /* buffer PCM and encode to Opus frames, send to all viewers */
144
+ private bufferAndEncode (chunk: SpeechFlowChunk) {
145
+ if (this.opusEncoder === null)
146
+ return
147
+ const pcm = chunk.payload as Buffer
148
+ this.pcmBuffer = Buffer.concat([ this.pcmBuffer, pcm ])
149
+
150
+ /* prevent unbounded buffer growth */
151
+ const maxBufferSize = this.OPUS_FRAME_BYTES * 10
152
+ if (this.pcmBuffer.length > maxBufferSize) {
153
+ this.log("warning", `PCM buffer overflow (${this.pcmBuffer.length} bytes), discarding excess`)
154
+ this.pcmBuffer = this.pcmBuffer.subarray(this.pcmBuffer.length - maxBufferSize)
155
+ }
156
+
157
+ while (this.pcmBuffer.length >= this.OPUS_FRAME_BYTES) {
158
+ const frame = this.pcmBuffer.subarray(0, this.OPUS_FRAME_BYTES)
159
+ this.pcmBuffer = this.pcmBuffer.subarray(this.OPUS_FRAME_BYTES)
160
+ try {
161
+ /* encode PCM to Opus */
162
+ const opusPacket = this.opusEncoder.encode(frame)
163
+ this.sendOpusToAllViewers(opusPacket)
164
+ }
165
+ catch (err: unknown) {
166
+ this.log("warning", `Opus encode error: ${util.ensureError(err).message}`)
167
+ }
168
+ }
169
+ }
170
+
171
+ /* send Opus packet to all connected WHEP viewers */
172
+ private sendOpusToAllViewers (opusPacket: Buffer) {
173
+ /* build RTP header */
174
+ const rtpHeader = new RtpHeader({
175
+ version: 2,
176
+ padding: false,
177
+ paddingSize: 0,
178
+ extension: false,
179
+ marker: true,
180
+ payloadType: 111, /* Opus payload type */
181
+ sequenceNumber: this.rtpSequence++ & 0xFFFF,
182
+ timestamp: this.rtpTimestamp,
183
+ ssrc: this.rtpSSRC,
184
+ csrc: [],
185
+ extensions: []
186
+ })
187
+
188
+ /* build RTP packet */
189
+ const rtpPacket = new RtpPacket(rtpHeader, opusPacket)
190
+
191
+ /* advance timestamp by frame duration */
192
+ this.rtpTimestamp = (this.rtpTimestamp + this.OPUS_FRAME_SIZE) >>> 0
193
+
194
+ /* send to all connected viewers (snapshot to avoid concurrent modification) */
195
+ const connections = Array.from(this.peerConnections.values())
196
+ for (const conn of connections) {
197
+ if (conn.track !== null) {
198
+ try {
199
+ conn.track.writeRtp(rtpPacket)
200
+ }
201
+ catch (err: unknown) {
202
+ this.log("warning", `failed to send RTP to WebRTC peer: ${util.ensureError(err).message}`)
203
+ }
204
+ }
205
+ }
206
+ }
207
+
208
+ /* parse ICE servers configuration */
209
+ private parseIceServers (): { urls: string }[] {
210
+ if (this.params.iceServers === "")
211
+ return []
212
+ let servers: { urls: string }[] = []
213
+ try {
214
+ servers = util.importObject("WebRTC ICE servers",
215
+ this.params.iceServers,
216
+ arktype.type({ urls: "string" }).array())
217
+ }
218
+ catch (err: unknown) {
219
+ this.log("warning", `invalid iceServers JSON: ${util.ensureError(err).message}`)
220
+ servers = []
221
+ }
222
+ return servers
223
+ }
224
+
225
+ /* create a new RTCPeerConnection with standard configuration */
226
+ private createPeerConnection (resourceId: string): { pc: RTCPeerConnection, subscription: { unSubscribe: () => void } } {
227
+ const pc = new RTCPeerConnection({
228
+ iceServers: this.parseIceServers(),
229
+ headerExtensions: {
230
+ audio: [ useSdesMid(), useAbsSendTime() ]
231
+ }
232
+ })
233
+ const subscription = pc.connectionStateChange.subscribe((state: string) => {
234
+ this.log("info", `WebRTC connection ${resourceId}: ${state}`)
235
+ if (state === "failed" || state === "closed" || state === "disconnected")
236
+ setImmediate(() => {
237
+ if (this.peerConnections.has(resourceId))
238
+ this.cleanupConnection(resourceId)
239
+ })
240
+ })
241
+ return { pc, subscription }
242
+ }
243
+
244
+ /* safely close a peer connection */
245
+ private closePeerConnection (pc: RTCPeerConnection) {
246
+ util.shield(() => { pc.close() })
247
+ }
248
+
249
+ /* perform SDP negotiation and establish connection */
250
+ private async performSDPNegotiation (
251
+ res: http.ServerResponse,
252
+ offer: string,
253
+ protocol: "WHIP" | "WHEP",
254
+ setupFn: (pc: RTCPeerConnection, resourceId: string) => MediaStreamTrack | null
255
+ ) {
256
+ /* enforce connection limit */
257
+ if (this.peerConnections.size >= this.maxConnections) {
258
+ res.writeHead(503, { "Content-Type": "text/plain" })
259
+ res.end("Service Unavailable: Maximum connections reached")
260
+ return
261
+ }
262
+
263
+ /* create peer connection */
264
+ const resourceId = crypto.randomUUID()
265
+ const { pc, subscription } = this.createPeerConnection(resourceId)
266
+
267
+ /* protocol-specific setup */
268
+ const track = setupFn(pc, resourceId)
269
+
270
+ /* complete SDP offer/answer exchange and establish connection */
271
+ try {
272
+ /* set remote description (offer from client) */
273
+ await pc.setRemoteDescription({ type: "offer", sdp: offer })
274
+
275
+ /* create and set local description (answer) */
276
+ const answer = await pc.createAnswer()
277
+ await pc.setLocalDescription(answer)
278
+
279
+ /* store connection */
280
+ this.peerConnections.set(resourceId, { pc, track, resourceId, subscription })
281
+
282
+ /* return SDP answer */
283
+ if (pc.localDescription === null || pc.localDescription === undefined)
284
+ throw new Error("local description is missing")
285
+ res.writeHead(201, {
286
+ "Content-Type": "application/sdp",
287
+ "Location": `${this.params.path}/${resourceId}`
288
+ })
289
+ res.end(pc.localDescription.sdp)
290
+ this.log("info", `${protocol} connection established: ${resourceId}`)
291
+ }
292
+ catch (err: unknown) {
293
+ util.shield(() => { subscription.unSubscribe() })
294
+ this.closePeerConnection(pc)
295
+ this.log("error", `${protocol} negotiation failed: ${util.ensureError(err).message}`)
296
+ res.writeHead(500, { "Content-Type": "text/plain" })
297
+ res.end("Internal Server Error")
298
+ }
299
+ }
300
+
301
+ /* handle WHIP POST (receiving audio from publisher) */
302
+ private async handleWHIP (res: http.ServerResponse, offer: string) {
303
+ await this.performSDPNegotiation(res, offer, "WHIP", (pc, _resourceId) => {
304
+ /* handle incoming audio track */
305
+ pc.ontrack = (event: { track: MediaStreamTrack }) => {
306
+ const track = event.track
307
+ if (track.kind === "audio") {
308
+ this.log("info", `WebRTC audio track received from publisher`)
309
+
310
+ /* subscribe to incoming RTP packets */
311
+ track.onReceiveRtp.subscribe((rtpPacket: RtpPacket) => {
312
+ this.decodeOpusToChunk(rtpPacket.payload)
313
+ })
314
+ }
315
+ }
316
+ return null
317
+ })
318
+ }
319
+
320
+ /* handle WHEP POST (sending audio to viewer) */
321
+ private async handleWHEP (res: http.ServerResponse, offer: string) {
322
+ await this.performSDPNegotiation(res, offer, "WHEP", (pc, _resourceId) => {
323
+ /* create outbound audio track */
324
+ const outboundTrack = new MediaStreamTrack({ kind: "audio" })
325
+ pc.addTrack(outboundTrack)
326
+ return outboundTrack
327
+ })
328
+ }
329
+
330
+ /* handle DELETE (connection teardown) */
331
+ private handleDELETE (res: http.ServerResponse, resourceId: string) {
332
+ if (this.peerConnections.has(resourceId)) {
333
+ this.cleanupConnection(resourceId)
334
+ res.writeHead(200)
335
+ res.end()
336
+ this.log("info", `WebRTC connection terminated: ${resourceId}`)
337
+ }
338
+ else {
339
+ res.writeHead(404, { "Content-Type": "text/plain" })
340
+ res.end("Not Found")
341
+ }
342
+ }
343
+
344
+ /* cleanup a peer connection */
345
+ private cleanupConnection (resourceId: string) {
346
+ const conn = this.peerConnections.get(resourceId)
347
+ if (conn === undefined)
348
+ return
349
+ this.peerConnections.delete(resourceId)
350
+ if (conn.subscription !== null)
351
+ util.shield(() => { conn.subscription?.unSubscribe() })
352
+ if (conn.track !== null)
353
+ util.shield(() => { conn.track?.stop() })
354
+ this.closePeerConnection(conn.pc)
355
+ }
356
+
357
+ /* open node */
358
+ async open () {
359
+ /* setup Opus codec */
360
+ this.opusEncoder = new OpusEncoder(this.OPUS_SAMPLE_RATE, this.OPUS_CHANNELS)
361
+ this.opusDecoder = new OpusEncoder(this.OPUS_SAMPLE_RATE, this.OPUS_CHANNELS)
362
+
363
+ /* initialize RTP state */
364
+ this.rtpSequence = Math.floor(Math.random() * 0x10000)
365
+ this.rtpTimestamp = Math.floor(Math.random() * 0x100000000) >>> 0
366
+ this.rtpSSRC = Math.floor(Math.random() * 0x100000000) >>> 0
367
+
368
+ /* setup chunk queue for incoming audio */
369
+ this.chunkQueue = new util.SingleQueue<SpeechFlowChunk>()
370
+
371
+ /* parse listen address */
372
+ const listen = this.parseAddress(this.params.listen, 8085)
373
+
374
+ /* setup HTTP server for WHIP/WHEP signaling */
375
+ const self = this
376
+ this.httpServer = http.createServer(async (req, res) => {
377
+ /* determine URL */
378
+ if (req.url === undefined) {
379
+ res.writeHead(400, { "Content-Type": "text/plain" })
380
+ res.end("Bad Request")
381
+ return
382
+ }
383
+ const host = req.headers.host?.replace(/[^a-zA-Z0-9:.\-_]/g, "") ?? "localhost"
384
+ const url = new URL(req.url, `http://${host}`)
385
+ const pathMatch = url.pathname === self.params.path
386
+ const resourceMatch = url.pathname.startsWith(self.params.path + "/")
387
+
388
+ /* CORS headers for browser clients */
389
+ res.setHeader("Access-Control-Allow-Origin", "*")
390
+ res.setHeader("Access-Control-Allow-Methods", "POST, DELETE, OPTIONS")
391
+ res.setHeader("Access-Control-Allow-Headers", "Content-Type")
392
+ res.setHeader("Access-Control-Expose-Headers", "Location")
393
+
394
+ /* handle CORS preflight */
395
+ if (req.method === "OPTIONS") {
396
+ res.writeHead(204)
397
+ res.end()
398
+ return
399
+ }
400
+
401
+ /* handle requests... */
402
+ if (req.method === "POST" && pathMatch) {
403
+ /* handle WHIP/WHEP POST */
404
+ const body = await self.readRequestBody(req)
405
+
406
+ /* sanity check content type */
407
+ const contentType = req.headers["content-type"]
408
+ if (contentType !== "application/sdp") {
409
+ res.writeHead(415, { "Content-Type": "text/plain" })
410
+ res.end("Unsupported Media Type")
411
+ return
412
+ }
413
+
414
+ /* determine if WHIP (receiving) or WHEP (sending) based on SDP content */
415
+ const hasSendonly = /\ba=sendonly\b/m.test(body)
416
+ const hasSendrecv = /\ba=sendrecv\b/m.test(body)
417
+ const hasRecvonly = /\ba=recvonly\b/m.test(body)
418
+ const isPublisher = hasSendonly || hasSendrecv
419
+ const isViewer = hasRecvonly
420
+
421
+ if (self.params.mode === "r" && isPublisher)
422
+ /* in read mode, accept WHIP publishers */
423
+ await self.handleWHIP(res, body)
424
+ else if (self.params.mode === "w" && isViewer)
425
+ /* in write mode, accept WHEP viewers */
426
+ await self.handleWHEP(res, body)
427
+ else {
428
+ res.writeHead(403, { "Content-Type": "text/plain" })
429
+ res.end("Forbidden")
430
+ }
431
+ }
432
+ else if (req.method === "DELETE" && resourceMatch) {
433
+ /* handle DELETE for connection teardown */
434
+ const resourceId = url.pathname.substring(self.params.path.length + 1)
435
+ self.handleDELETE(res, resourceId)
436
+ }
437
+ else {
438
+ /* handle unknown requests */
439
+ res.writeHead(404, { "Content-Type": "text/plain" })
440
+ res.end("Not Found")
441
+ }
442
+ })
443
+
444
+ /* start HTTP server */
445
+ await new Promise<void>((resolve) => {
446
+ this.httpServer!.listen(listen.port, listen.host, () => {
447
+ const mode = this.params.mode === "r" ? "WHIP" : "WHEP"
448
+ this.log("info", `WebRTC ${mode} server listening on http://${listen.host}:${listen.port}${this.params.path}`)
449
+ resolve()
450
+ })
451
+ })
452
+
453
+ /* create duplex stream */
454
+ const reads = new util.PromiseSet<void>()
455
+ this.stream = new Stream.Duplex({
456
+ writableObjectMode: true,
457
+ readableObjectMode: true,
458
+ decodeStrings: false,
459
+ highWaterMark: 1,
460
+ write (chunk: SpeechFlowChunk, encoding, callback) {
461
+ if (self.params.mode === "r") {
462
+ callback(new Error("write operation on read mode node"))
463
+ return
464
+ }
465
+ if (chunk.type !== "audio") {
466
+ callback(new Error("WebRTC node only supports audio type"))
467
+ return
468
+ }
469
+ if (self.peerConnections.size === 0) {
470
+ /* silently drop if no viewers connected */
471
+ callback()
472
+ return
473
+ }
474
+ self.bufferAndEncode(chunk)
475
+ callback()
476
+ },
477
+ async final (callback) {
478
+ await reads.awaitAll()
479
+ callback()
480
+ },
481
+ read (size: number) {
482
+ if (self.params.mode === "w") {
483
+ self.log("error", "read operation on write mode node")
484
+ this.push(null)
485
+ return
486
+ }
487
+ reads.add(self.chunkQueue!.read().then((chunk) => {
488
+ this.push(chunk, "binary")
489
+ }).catch((err: Error) => {
490
+ self.log("warning", `read on chunk queue operation failed: ${err}`)
491
+ this.push(null)
492
+ }))
493
+ }
494
+ })
495
+ }
496
+
497
+ /* close node */
498
+ async close () {
499
+ /* close all peer connections */
500
+ for (const resourceId of Array.from(this.peerConnections.keys()))
501
+ this.cleanupConnection(resourceId)
502
+
503
+ /* close HTTP server */
504
+ if (this.httpServer !== null) {
505
+ await new Promise<void>((resolve, reject) => {
506
+ this.httpServer!.close((err) => {
507
+ if (err) reject(err)
508
+ else resolve()
509
+ })
510
+ }).catch((err: Error) => {
511
+ this.log("warning", `failed to close HTTP server: ${err.message}`)
512
+ })
513
+ this.httpServer = null
514
+ }
515
+
516
+ /* drain and clear chunk queue */
517
+ if (this.chunkQueue !== null) {
518
+ this.chunkQueue.drain()
519
+ this.chunkQueue = null
520
+ }
521
+
522
+ /* cleanup codec instances */
523
+ this.opusEncoder = null
524
+ this.opusDecoder = null
525
+ this.pcmBuffer = Buffer.alloc(0)
526
+
527
+ /* shutdown stream */
528
+ if (this.stream !== null) {
529
+ await util.destroyStream(this.stream)
530
+ this.stream = null
531
+ }
532
+ }
533
+ }
@@ -10,6 +10,9 @@ import path from "node:path"
10
10
  /* external dependencies */
11
11
  import { AudioContext, AudioWorkletNode } from "node-web-audio-api"
12
12
 
13
+ /* internal dependencies */
14
+ import { shield } from "./speechflow-util-error"
15
+
13
16
  /* calculate duration of an audio buffer */
14
17
  export function audioBufferDuration (
15
18
  buffer: Buffer,
@@ -280,16 +283,13 @@ export class WebAudio {
280
283
 
281
284
  public async destroy (): Promise<void> {
282
285
  /* reject all pending promises */
283
- try {
286
+ shield(() => {
284
287
  this.pendingPromises.forEach(({ reject, timeout }) => {
285
288
  clearTimeout(timeout)
286
289
  reject(new Error("WebAudio destroyed"))
287
290
  })
288
291
  this.pendingPromises.clear()
289
- }
290
- catch (_err) {
291
- /* ignored -- cleanup during shutdown */
292
- }
292
+ })
293
293
 
294
294
  /* disconnect nodes */
295
295
  if (this.sourceNode !== null) {
@@ -188,3 +188,12 @@ export function runner<T> (
188
188
  return run(() => action(...args), oncatch, onfinally)
189
189
  }
190
190
  }
191
+
192
+ /* shield cleanup operation, ignoring errors */
193
+ export function shield<T extends (void | Promise<void>)> (op: () => T) {
194
+ return run(
195
+ "shielded operation",
196
+ () => { op() },
197
+ (_err) => { /* ignore error */ }
198
+ )
199
+ }