speechflow 1.7.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +388 -120
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
  8. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  10. package/speechflow-cli/dst/speechflow-main-graph.js +30 -9
  11. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  12. package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
  13. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
  15. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  16. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
  17. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  21. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
  23. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
  25. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
  28. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
  30. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
  33. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
  35. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +31 -4
  37. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
  40. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +16 -0
  42. package/speechflow-cli/dst/speechflow-node-a2t-google.js +314 -0
  43. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
  45. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
  48. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
  51. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  53. package/speechflow-cli/dst/speechflow-node-t2a-google.js +215 -0
  54. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  55. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  56. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
  57. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  59. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +192 -0
  60. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  61. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  62. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +619 -0
  63. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  64. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
  65. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  66. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  68. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  69. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +161 -0
  70. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  71. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  72. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  73. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  74. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  75. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  76. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  77. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  78. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +48 -100
  79. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  80. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +8 -8
  81. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  83. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  84. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  85. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  86. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  87. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  88. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
  89. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  90. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  91. package/speechflow-cli/dst/speechflow-node-xio-exec.js +224 -0
  92. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  93. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  94. package/speechflow-cli/dst/speechflow-node-xio-file.js +78 -67
  95. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  97. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  98. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  99. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  100. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  101. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +502 -0
  102. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  103. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
  104. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  105. package/speechflow-cli/dst/speechflow-util-audio.js +8 -5
  106. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  107. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
  108. package/speechflow-cli/dst/speechflow-util-error.js +5 -0
  109. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  110. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  111. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  112. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  113. package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
  114. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  115. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  116. package/speechflow-cli/dst/speechflow-util.js +2 -0
  117. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  118. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  119. package/speechflow-cli/package.json +35 -18
  120. package/speechflow-cli/src/lib.d.ts +5 -0
  121. package/speechflow-cli/src/speechflow-main-api.ts +16 -16
  122. package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
  123. package/speechflow-cli/src/speechflow-main-graph.ts +38 -14
  124. package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
  125. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
  126. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
  127. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
  128. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
  129. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
  130. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
  131. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  132. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
  133. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
  134. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
  135. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
  136. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
  137. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +31 -4
  138. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
  139. package/speechflow-cli/src/speechflow-node-a2t-google.ts +315 -0
  140. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
  141. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
  142. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
  143. package/speechflow-cli/src/speechflow-node-t2a-google.ts +203 -0
  144. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
  145. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +176 -0
  146. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +710 -0
  147. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +3 -4
  148. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  149. package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
  150. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +137 -0
  151. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  152. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  153. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +188 -0
  154. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +8 -8
  155. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  156. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  157. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
  158. package/speechflow-cli/src/speechflow-node-xio-exec.ts +211 -0
  159. package/speechflow-cli/src/speechflow-node-xio-file.ts +91 -80
  160. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
  161. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  162. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +535 -0
  163. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  164. package/speechflow-cli/src/speechflow-util-audio.ts +10 -5
  165. package/speechflow-cli/src/speechflow-util-error.ts +9 -0
  166. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  167. package/speechflow-cli/src/speechflow-util-queue.ts +3 -3
  168. package/speechflow-cli/src/speechflow-util.ts +2 -0
  169. package/speechflow-ui-db/package.json +9 -9
  170. package/speechflow-ui-st/package.json +9 -9
  171. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  172. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  173. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  174. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  175. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  176. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -247
  177. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -0,0 +1,535 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+ import http from "node:http"
10
+ import crypto from "node:crypto"
11
+
12
+ /* external dependencies */
13
+ import { DateTime } from "luxon"
14
+ import * as arktype from "arktype"
15
+ import { OpusEncoder } from "@discordjs/opus"
16
+ import {
17
+ RTCPeerConnection, MediaStreamTrack,
18
+ RtpPacket, RtpHeader,
19
+ useAbsSendTime, useSdesMid
20
+ } from "werift"
21
+
22
+ /* internal dependencies */
23
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
24
+ import * as util from "./speechflow-util"
25
+
26
+ /* WebRTC peer connection state */
27
+ interface WebRTCConnection {
28
+ pc: RTCPeerConnection
29
+ track: MediaStreamTrack | null
30
+ resourceId: string
31
+ subscription: { unSubscribe: () => void } | null
32
+ }
33
+
34
+ /* SpeechFlow node for WebRTC networking (WHIP/WHEP) */
35
+ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
36
+ /* declare official node name */
37
+ public static name = "xio-webrtc"
38
+
39
+ /* internal state */
40
+ private peerConnections = new Map<string, WebRTCConnection>()
41
+ private httpServer: http.Server | null = null
42
+ private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
43
+ private opusEncoder: OpusEncoder | null = null
44
+ private opusDecoder: OpusEncoder | null = null
45
+ private pcmBuffer = Buffer.alloc(0)
46
+ private rtpSequence = 0
47
+ private rtpTimestamp = 0
48
+ private rtpSSRC = 0
49
+ private maxConnections = 10
50
+
51
+ /* Opus codec configuration: 48kHz, mono, 16-bit */
52
+ private readonly OPUS_SAMPLE_RATE = 48000
53
+ private readonly OPUS_CHANNELS = 1
54
+ private readonly OPUS_BIT_DEPTH = 16
55
+ private readonly OPUS_FRAME_SIZE = 960 /* 20ms at 48kHz = 960 samples */
56
+ private readonly OPUS_FRAME_BYTES = 960 * 2 /* 16-bit = 2 bytes per sample */
57
+
58
+ /* construct node */
59
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
60
+ super(id, cfg, opts, args)
61
+
62
+ /* declare node configuration parameters */
63
+ this.configure({
64
+ listen: { type: "string", pos: 0, val: "8085", match: /^(?:\d+|.+?:\d+)$/ },
65
+ path: { type: "string", pos: 1, val: "/webrtc", match: /^\/.+$/ },
66
+ mode: { type: "string", pos: 2, val: "r", match: /^(?:r|w)$/ },
67
+ iceServers: { type: "string", pos: 3, val: "", match: /^.*$/ }
68
+ })
69
+
70
+ /* declare node input/output format */
71
+ if (this.params.mode === "r") {
72
+ this.input = "none"
73
+ this.output = "audio"
74
+ }
75
+ else if (this.params.mode === "w") {
76
+ this.input = "audio"
77
+ this.output = "none"
78
+ }
79
+ }
80
+
81
+ /* parse address:port string */
82
+ private parseAddress (addr: string, defaultPort: number): { host: string, port: number } {
83
+ if (addr.match(/^\d+$/))
84
+ return { host: "0.0.0.0", port: Number.parseInt(addr, 10) }
85
+ const m = addr.match(/^(.+?):(\d+)$/)
86
+ if (m === null)
87
+ return { host: addr, port: defaultPort }
88
+ return { host: m[1], port: Number.parseInt(m[2], 10) }
89
+ }
90
+
91
+ /* read HTTP request body */
92
+ private readRequestBody (req: http.IncomingMessage): Promise<string> {
93
+ return new Promise((resolve, reject) => {
94
+ const chunks: Buffer[] = []
95
+ const maxBodySize = 1024 * 1024 /* 1 MB limit for SDP */
96
+ let totalSize = 0
97
+ const onData = (chunk: Buffer) => {
98
+ totalSize += chunk.length
99
+ if (totalSize > maxBodySize) {
100
+ req.removeListener("data", onData)
101
+ req.removeListener("end", onEnd)
102
+ req.removeListener("error", onError)
103
+ req.destroy()
104
+ reject(new Error("request body too large"))
105
+ return
106
+ }
107
+ chunks.push(chunk)
108
+ }
109
+ const onEnd = () =>
110
+ resolve(Buffer.concat(chunks).toString("utf8"))
111
+ const onError = (err: Error) =>
112
+ reject(err)
113
+ req.on("data", onData)
114
+ req.on("end", onEnd)
115
+ req.on("error", onError)
116
+ })
117
+ }
118
+
119
+ /* decode Opus packet to PCM and enqueue as SpeechFlowChunk */
120
+ private decodeOpusToChunk (opusPacket: Buffer) {
121
+ if (this.opusDecoder === null || this.chunkQueue === null)
122
+ return
123
+ if (this.params.mode === "w")
124
+ return
125
+ try {
126
+ /* decode Opus to PCM (16-bit signed, little-endian, 48kHz) */
127
+ const pcmBuffer = this.opusDecoder.decode(opusPacket)
128
+
129
+ /* create chunk with timing information (use Opus codec rates, not config) */
130
+ const now = DateTime.now()
131
+ const start = now.diff(this.timeZero)
132
+ const duration = util.audioBufferDuration(pcmBuffer,
133
+ this.OPUS_SAMPLE_RATE, this.OPUS_BIT_DEPTH, this.OPUS_CHANNELS)
134
+ const end = start.plus(duration * 1000)
135
+ const chunk = new SpeechFlowChunk(start, end, "final", "audio", pcmBuffer)
136
+ this.chunkQueue.write(chunk)
137
+ }
138
+ catch (err: unknown) {
139
+ this.log("warning", `Opus decode error: ${util.ensureError(err).message}`)
140
+ }
141
+ }
142
+
143
+ /* buffer PCM and encode to Opus frames, send to all viewers */
144
+ private bufferAndEncode (chunk: SpeechFlowChunk) {
145
+ if (this.opusEncoder === null)
146
+ return
147
+ const pcm = chunk.payload as Buffer
148
+ this.pcmBuffer = Buffer.concat([ this.pcmBuffer, pcm ])
149
+
150
+ /* prevent unbounded buffer growth */
151
+ const maxBufferSize = this.OPUS_FRAME_BYTES * 10
152
+ if (this.pcmBuffer.length > maxBufferSize) {
153
+ this.log("warning", `PCM buffer overflow (${this.pcmBuffer.length} bytes), discarding excess`)
154
+ this.pcmBuffer = this.pcmBuffer.subarray(this.pcmBuffer.length - maxBufferSize)
155
+ }
156
+
157
+ /* process full Opus frames from buffer */
158
+ while (this.pcmBuffer.length >= this.OPUS_FRAME_BYTES) {
159
+ const frame = this.pcmBuffer.subarray(0, this.OPUS_FRAME_BYTES)
160
+ this.pcmBuffer = this.pcmBuffer.subarray(this.OPUS_FRAME_BYTES)
161
+ try {
162
+ /* encode PCM to Opus */
163
+ const opusPacket = this.opusEncoder.encode(frame)
164
+ this.sendOpusToAllViewers(opusPacket)
165
+ }
166
+ catch (err: unknown) {
167
+ this.log("warning", `Opus encode error: ${util.ensureError(err).message}`)
168
+ }
169
+ }
170
+ }
171
+
172
+ /* send Opus packet to all connected WHEP viewers */
173
+ private sendOpusToAllViewers (opusPacket: Buffer) {
174
+ /* build RTP header */
175
+ const rtpHeader = new RtpHeader({
176
+ version: 2,
177
+ padding: false,
178
+ paddingSize: 0,
179
+ extension: false,
180
+ marker: true,
181
+ payloadType: 111, /* Opus payload type */
182
+ sequenceNumber: this.rtpSequence++ & 0xFFFF,
183
+ timestamp: this.rtpTimestamp,
184
+ ssrc: this.rtpSSRC,
185
+ csrc: [],
186
+ extensions: []
187
+ })
188
+
189
+ /* build RTP packet */
190
+ const rtpPacket = new RtpPacket(rtpHeader, opusPacket)
191
+
192
+ /* advance timestamp by frame duration */
193
+ this.rtpTimestamp = (this.rtpTimestamp + this.OPUS_FRAME_SIZE) >>> 0
194
+
195
+ /* send to all connected viewers (snapshot to avoid concurrent modification) */
196
+ const connections = Array.from(this.peerConnections.values())
197
+ for (const conn of connections) {
198
+ if (conn.track !== null) {
199
+ try {
200
+ conn.track.writeRtp(rtpPacket)
201
+ }
202
+ catch (err: unknown) {
203
+ this.log("warning", `failed to send RTP to WebRTC peer: ${util.ensureError(err).message}`)
204
+ }
205
+ }
206
+ }
207
+ }
208
+
209
+ /* parse ICE servers configuration */
210
+ private parseIceServers (): { urls: string }[] {
211
+ if (this.params.iceServers === "")
212
+ return []
213
+ let servers: { urls: string }[] = []
214
+ try {
215
+ servers = util.importObject("WebRTC ICE servers",
216
+ this.params.iceServers,
217
+ arktype.type({ urls: "string" }).array())
218
+ }
219
+ catch (err: unknown) {
220
+ this.log("warning", `invalid iceServers JSON: ${util.ensureError(err).message}`)
221
+ servers = []
222
+ }
223
+ return servers
224
+ }
225
+
226
+ /* create a new RTCPeerConnection with standard configuration */
227
+ private createPeerConnection (resourceId: string): { pc: RTCPeerConnection, subscription: { unSubscribe: () => void } } {
228
+ const pc = new RTCPeerConnection({
229
+ iceServers: this.parseIceServers(),
230
+ headerExtensions: {
231
+ audio: [ useSdesMid(), useAbsSendTime() ]
232
+ }
233
+ })
234
+ const subscription = pc.connectionStateChange.subscribe((state: string) => {
235
+ this.log("info", `WebRTC connection ${resourceId}: ${state}`)
236
+ if (state === "failed" || state === "closed" || state === "disconnected")
237
+ setImmediate(() => {
238
+ if (this.peerConnections.has(resourceId))
239
+ this.cleanupConnection(resourceId)
240
+ })
241
+ })
242
+ return { pc, subscription }
243
+ }
244
+
245
+ /* safely close a peer connection */
246
+ private closePeerConnection (pc: RTCPeerConnection) {
247
+ util.shield(() => { pc.close() })
248
+ }
249
+
250
+ /* perform SDP negotiation and establish connection */
251
+ private async performSDPNegotiation (
252
+ res: http.ServerResponse,
253
+ offer: string,
254
+ protocol: "WHIP" | "WHEP",
255
+ setupFn: (pc: RTCPeerConnection, resourceId: string) => MediaStreamTrack | null
256
+ ) {
257
+ /* enforce connection limit */
258
+ if (this.peerConnections.size >= this.maxConnections) {
259
+ res.writeHead(503, { "Content-Type": "text/plain" })
260
+ res.end("Service Unavailable: Maximum connections reached")
261
+ return
262
+ }
263
+
264
+ /* create peer connection */
265
+ const resourceId = crypto.randomUUID()
266
+ const { pc, subscription } = this.createPeerConnection(resourceId)
267
+
268
+ /* protocol-specific setup */
269
+ const track = setupFn(pc, resourceId)
270
+
271
+ /* complete SDP offer/answer exchange and establish connection */
272
+ try {
273
+ /* set remote description (offer from client) */
274
+ await pc.setRemoteDescription({ type: "offer", sdp: offer })
275
+
276
+ /* create and set local description (answer) */
277
+ const answer = await pc.createAnswer()
278
+ await pc.setLocalDescription(answer)
279
+
280
+ /* store connection */
281
+ this.peerConnections.set(resourceId, { pc, track, resourceId, subscription })
282
+
283
+ /* return SDP answer */
284
+ if (pc.localDescription === null || pc.localDescription === undefined)
285
+ throw new Error("local description is missing")
286
+ res.writeHead(201, {
287
+ "Content-Type": "application/sdp",
288
+ "Location": `${this.params.path}/${resourceId}`
289
+ })
290
+ res.end(pc.localDescription.sdp)
291
+ this.log("info", `${protocol} connection established: ${resourceId}`)
292
+ }
293
+ catch (err: unknown) {
294
+ util.shield(() => { subscription.unSubscribe() })
295
+ this.closePeerConnection(pc)
296
+ this.log("error", `${protocol} negotiation failed: ${util.ensureError(err).message}`)
297
+ res.writeHead(500, { "Content-Type": "text/plain" })
298
+ res.end("Internal Server Error")
299
+ }
300
+ }
301
+
302
+ /* handle WHIP POST (receiving audio from publisher) */
303
+ private async handleWHIP (res: http.ServerResponse, offer: string) {
304
+ await this.performSDPNegotiation(res, offer, "WHIP", (pc, _resourceId) => {
305
+ /* handle incoming audio track */
306
+ pc.ontrack = (event: { track: MediaStreamTrack }) => {
307
+ const track = event.track
308
+ if (track.kind === "audio") {
309
+ this.log("info", `WebRTC audio track received from publisher`)
310
+
311
+ /* subscribe to incoming RTP packets */
312
+ track.onReceiveRtp.subscribe((rtpPacket: RtpPacket) => {
313
+ this.decodeOpusToChunk(rtpPacket.payload)
314
+ })
315
+ }
316
+ }
317
+ return null
318
+ })
319
+ }
320
+
321
+ /* handle WHEP POST (sending audio to viewer) */
322
+ private async handleWHEP (res: http.ServerResponse, offer: string) {
323
+ await this.performSDPNegotiation(res, offer, "WHEP", (pc, _resourceId) => {
324
+ /* create outbound audio track */
325
+ const outboundTrack = new MediaStreamTrack({ kind: "audio" })
326
+ pc.addTrack(outboundTrack)
327
+ return outboundTrack
328
+ })
329
+ }
330
+
331
+ /* handle DELETE (connection teardown) */
332
+ private handleDELETE (res: http.ServerResponse, resourceId: string) {
333
+ if (this.peerConnections.has(resourceId)) {
334
+ this.cleanupConnection(resourceId)
335
+ res.writeHead(200)
336
+ res.end()
337
+ this.log("info", `WebRTC connection terminated: ${resourceId}`)
338
+ }
339
+ else {
340
+ res.writeHead(404, { "Content-Type": "text/plain" })
341
+ res.end("Not Found")
342
+ }
343
+ }
344
+
345
+ /* cleanup a peer connection */
346
+ private cleanupConnection (resourceId: string) {
347
+ const conn = this.peerConnections.get(resourceId)
348
+ if (conn === undefined)
349
+ return
350
+ this.peerConnections.delete(resourceId)
351
+ if (conn.subscription !== null)
352
+ util.shield(() => { conn.subscription?.unSubscribe() })
353
+ if (conn.track !== null)
354
+ util.shield(() => { conn.track?.stop() })
355
+ this.closePeerConnection(conn.pc)
356
+ }
357
+
358
+ /* open node */
359
+ async open () {
360
+ /* setup Opus codec */
361
+ this.opusEncoder = new OpusEncoder(this.OPUS_SAMPLE_RATE, this.OPUS_CHANNELS)
362
+ this.opusDecoder = new OpusEncoder(this.OPUS_SAMPLE_RATE, this.OPUS_CHANNELS)
363
+
364
+ /* initialize RTP state */
365
+ this.rtpSequence = Math.floor(Math.random() * 0x10000)
366
+ this.rtpTimestamp = Math.floor(Math.random() * 0x100000000) >>> 0
367
+ this.rtpSSRC = Math.floor(Math.random() * 0x100000000) >>> 0
368
+
369
+ /* setup chunk queue for incoming audio */
370
+ this.chunkQueue = new util.SingleQueue<SpeechFlowChunk>()
371
+
372
+ /* parse listen address */
373
+ const listen = this.parseAddress(this.params.listen, 8085)
374
+
375
+ /* setup HTTP server for WHIP/WHEP signaling */
376
+ const self = this
377
+ this.httpServer = http.createServer(async (req, res) => {
378
+ /* determine URL */
379
+ if (req.url === undefined) {
380
+ res.writeHead(400, { "Content-Type": "text/plain" })
381
+ res.end("Bad Request")
382
+ return
383
+ }
384
+ const host = req.headers.host?.replace(/[^a-zA-Z0-9:.\-_]/g, "") ?? "localhost"
385
+ const url = new URL(req.url, `http://${host}`)
386
+ const pathMatch = url.pathname === self.params.path
387
+ const resourceMatch = url.pathname.startsWith(self.params.path + "/")
388
+
389
+ /* CORS headers for browser clients */
390
+ res.setHeader("Access-Control-Allow-Origin", "*")
391
+ res.setHeader("Access-Control-Allow-Methods", "POST, DELETE, OPTIONS")
392
+ res.setHeader("Access-Control-Allow-Headers", "Content-Type")
393
+ res.setHeader("Access-Control-Expose-Headers", "Location")
394
+
395
+ /* handle CORS preflight */
396
+ if (req.method === "OPTIONS") {
397
+ res.writeHead(204)
398
+ res.end()
399
+ return
400
+ }
401
+
402
+ /* handle requests... */
403
+ if (req.method === "POST" && pathMatch) {
404
+ /* handle WHIP/WHEP POST */
405
+ const body = await self.readRequestBody(req)
406
+
407
+ /* sanity check content type */
408
+ const contentType = req.headers["content-type"]
409
+ if (contentType !== "application/sdp") {
410
+ res.writeHead(415, { "Content-Type": "text/plain" })
411
+ res.end("Unsupported Media Type")
412
+ return
413
+ }
414
+
415
+ /* determine if WHIP (receiving) or WHEP (sending) based on SDP content */
416
+ const hasSendonly = /\ba=sendonly\b/m.test(body)
417
+ const hasSendrecv = /\ba=sendrecv\b/m.test(body)
418
+ const hasRecvonly = /\ba=recvonly\b/m.test(body)
419
+ const isPublisher = hasSendonly || hasSendrecv
420
+ const isViewer = hasRecvonly
421
+
422
+ /* handle protocol based on mode */
423
+ if (self.params.mode === "r" && isPublisher)
424
+ /* in read mode, accept WHIP publishers */
425
+ await self.handleWHIP(res, body)
426
+ else if (self.params.mode === "w" && isViewer)
427
+ /* in write mode, accept WHEP viewers */
428
+ await self.handleWHEP(res, body)
429
+ else {
430
+ res.writeHead(403, { "Content-Type": "text/plain" })
431
+ res.end("Forbidden")
432
+ }
433
+ }
434
+ else if (req.method === "DELETE" && resourceMatch) {
435
+ /* handle DELETE for connection teardown */
436
+ const resourceId = url.pathname.substring(self.params.path.length + 1)
437
+ self.handleDELETE(res, resourceId)
438
+ }
439
+ else {
440
+ /* handle unknown requests */
441
+ res.writeHead(404, { "Content-Type": "text/plain" })
442
+ res.end("Not Found")
443
+ }
444
+ })
445
+
446
+ /* start HTTP server */
447
+ await new Promise<void>((resolve) => {
448
+ this.httpServer!.listen(listen.port, listen.host, () => {
449
+ const mode = this.params.mode === "r" ? "WHIP" : "WHEP"
450
+ this.log("info", `WebRTC ${mode} server listening on http://${listen.host}:${listen.port}${this.params.path}`)
451
+ resolve()
452
+ })
453
+ })
454
+
455
+ /* create duplex stream */
456
+ const reads = new util.PromiseSet<void>()
457
+ this.stream = new Stream.Duplex({
458
+ writableObjectMode: true,
459
+ readableObjectMode: true,
460
+ decodeStrings: false,
461
+ highWaterMark: 1,
462
+ write (chunk: SpeechFlowChunk, encoding, callback) {
463
+ if (self.params.mode === "r") {
464
+ callback(new Error("write operation on read mode node"))
465
+ return
466
+ }
467
+ if (chunk.type !== "audio") {
468
+ callback(new Error("WebRTC node only supports audio type"))
469
+ return
470
+ }
471
+ if (self.peerConnections.size === 0) {
472
+ /* silently drop if no viewers connected */
473
+ callback()
474
+ return
475
+ }
476
+ self.bufferAndEncode(chunk)
477
+ callback()
478
+ },
479
+ async final (callback) {
480
+ await reads.awaitAll()
481
+ callback()
482
+ },
483
+ read (size: number) {
484
+ if (self.params.mode === "w") {
485
+ self.log("error", "read operation on write mode node")
486
+ this.push(null)
487
+ return
488
+ }
489
+ reads.add(self.chunkQueue!.read().then((chunk) => {
490
+ this.push(chunk, "binary")
491
+ }).catch((err: Error) => {
492
+ self.log("warning", `read on chunk queue operation failed: ${err}`)
493
+ this.push(null)
494
+ }))
495
+ }
496
+ })
497
+ }
498
+
499
+ /* close node */
500
+ async close () {
501
+ /* close all peer connections */
502
+ for (const resourceId of Array.from(this.peerConnections.keys()))
503
+ this.cleanupConnection(resourceId)
504
+
505
+ /* close HTTP server */
506
+ if (this.httpServer !== null) {
507
+ await new Promise<void>((resolve, reject) => {
508
+ this.httpServer!.close((err) => {
509
+ if (err) reject(err)
510
+ else resolve()
511
+ })
512
+ }).catch((err: Error) => {
513
+ this.log("warning", `failed to close HTTP server: ${err.message}`)
514
+ })
515
+ this.httpServer = null
516
+ }
517
+
518
+ /* drain and clear chunk queue */
519
+ if (this.chunkQueue !== null) {
520
+ this.chunkQueue.drain()
521
+ this.chunkQueue = null
522
+ }
523
+
524
+ /* cleanup codec instances */
525
+ this.opusEncoder = null
526
+ this.opusDecoder = null
527
+ this.pcmBuffer = Buffer.alloc(0)
528
+
529
+ /* shutdown stream */
530
+ if (this.stream !== null) {
531
+ await util.destroyStream(this.stream)
532
+ this.stream = null
533
+ }
534
+ }
535
+ }
@@ -15,7 +15,7 @@ import ReconnWebSocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
15
15
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
16
  import * as util from "./speechflow-util"
17
17
 
18
- /* SpeechFlow node for Websocket networking */
18
+ /* SpeechFlow node for WebSocket networking */
19
19
  export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
20
20
  /* declare official node name */
21
21
  public static name = "xio-websocket"
@@ -38,9 +38,9 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
38
38
 
39
39
  /* sanity check parameters */
40
40
  if (this.params.listen !== "" && this.params.connect !== "")
41
- throw new Error("Websocket node cannot listen and connect at the same time")
41
+ throw new Error("WebSocket node cannot listen and connect at the same time")
42
42
  else if (this.params.listen === "" && this.params.connect === "")
43
- throw new Error("Websocket node requires either listen or connect mode")
43
+ throw new Error("WebSocket node requires either listen or connect mode")
44
44
 
45
45
  /* declare node input/output format */
46
46
  if (this.params.mode === "rw") {
@@ -121,7 +121,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
121
121
  else if (chunk.type !== self.params.type)
122
122
  callback(new Error(`written chunk is not of ${self.params.type} type`))
123
123
  else if (websockets.size === 0)
124
- callback(new Error("still no Websocket connections available"))
124
+ callback(new Error("still no WebSocket connections available"))
125
125
  else {
126
126
  const data = util.streamChunkEncode(chunk)
127
127
  const results: Promise<void>[] = []
@@ -168,10 +168,10 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
168
168
  connectionTimeout: 4000,
169
169
  minUptime: 5000
170
170
  })
171
- this.client.addEventListener("open", (ev) => {
171
+ this.client.addEventListener("open", (_ev) => {
172
172
  this.log("info", `connection opened to URL ${this.params.connect}`)
173
173
  })
174
- this.client.addEventListener("close", (ev) => {
174
+ this.client.addEventListener("close", (_ev) => {
175
175
  this.log("info", `connection closed to URL ${this.params.connect}`)
176
176
  })
177
177
  this.client.addEventListener("error", (ev: ErrorEvent) => {
@@ -208,7 +208,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
208
208
  else if (chunk.type !== self.params.type)
209
209
  callback(new Error(`written chunk is not of ${self.params.type} type`))
210
210
  else if (!self.client!.OPEN)
211
- callback(new Error("still no Websocket connection available"))
211
+ callback(new Error("still no WebSocket connection available"))
212
212
  else {
213
213
  const data = util.streamChunkEncode(chunk)
214
214
  self.client!.send(data)
@@ -234,7 +234,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
234
234
 
235
235
  /* close node */
236
236
  async close () {
237
- /* close Websocket server */
237
+ /* close WebSocket server */
238
238
  if (this.server !== null) {
239
239
  await new Promise<void>((resolve, reject) => {
240
240
  this.server!.close((error) => {
@@ -245,7 +245,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
245
245
  this.server = null
246
246
  }
247
247
 
248
- /* close Websocket client */
248
+ /* close WebSocket client */
249
249
  if (this.client !== null) {
250
250
  this.client.close()
251
251
  this.client = null
@@ -10,6 +10,9 @@ import path from "node:path"
10
10
  /* external dependencies */
11
11
  import { AudioContext, AudioWorkletNode } from "node-web-audio-api"
12
12
 
13
+ /* internal dependencies */
14
+ import { shield } from "./speechflow-util-error"
15
+
13
16
  /* calculate duration of an audio buffer */
14
17
  export function audioBufferDuration (
15
18
  buffer: Buffer,
@@ -159,6 +162,9 @@ export function updateEnvelopeForChannel (
159
162
  else
160
163
  currentEnv = alphaR * currentEnv + (1 - alphaR) * det
161
164
  }
165
+
166
+ /* store updated envelope value back */
167
+ env[chan] = currentEnv
162
168
  return Math.sqrt(Math.max(currentEnv, 1e-12))
163
169
  }
164
170
 
@@ -170,6 +176,7 @@ export function dB2lin (db: number): number {
170
176
  return Math.pow(10, db / 20)
171
177
  }
172
178
 
179
+ /* Web Audio API wrapper class */
173
180
  export class WebAudio {
174
181
  /* internal state */
175
182
  public audioContext: AudioContext
@@ -278,18 +285,16 @@ export class WebAudio {
278
285
  })
279
286
  }
280
287
 
288
+ /* destroy object */
281
289
  public async destroy (): Promise<void> {
282
290
  /* reject all pending promises */
283
- try {
291
+ shield(() => {
284
292
  this.pendingPromises.forEach(({ reject, timeout }) => {
285
293
  clearTimeout(timeout)
286
294
  reject(new Error("WebAudio destroyed"))
287
295
  })
288
296
  this.pendingPromises.clear()
289
- }
290
- catch (_err) {
291
- /* ignored -- cleanup during shutdown */
292
- }
297
+ })
293
298
 
294
299
  /* disconnect nodes */
295
300
  if (this.sourceNode !== null) {
@@ -188,3 +188,12 @@ export function runner<T> (
188
188
  return run(() => action(...args), oncatch, onfinally)
189
189
  }
190
190
  }
191
+
192
+ /* shield cleanup operation, ignoring errors */
193
+ export function shield<T extends (void | Promise<void>)> (op: () => T) {
194
+ return run(
195
+ "shielded operation",
196
+ () => { op() },
197
+ (_err) => { /* ignore error */ }
198
+ )
199
+ }