speechflow 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/README.md +273 -7
  3. package/etc/claude.md +70 -0
  4. package/etc/speechflow.png +0 -0
  5. package/etc/speechflow.yaml +29 -11
  6. package/etc/stx.conf +7 -0
  7. package/package.json +7 -6
  8. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
  9. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
  10. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
  14. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
  15. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
  16. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
  17. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
  18. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
  19. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
  20. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
  21. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
  22. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
  23. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
  24. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
  25. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
  26. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
  27. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
  29. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
  30. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
  31. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
  32. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
  33. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
  34. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
  35. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
  36. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  37. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +35 -53
  39. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
  41. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  42. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
  43. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
  44. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
  45. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
  46. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
  47. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
  48. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
  49. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
  50. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
  51. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
  52. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  53. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
  54. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
  56. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
  57. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
  58. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +16 -33
  59. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  60. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
  61. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
  62. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
  63. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
  64. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
  65. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
  66. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
  67. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  68. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
  69. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  70. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
  71. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
  72. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
  73. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
  74. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  75. package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
  76. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
  78. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  79. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
  80. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  81. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
  82. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  83. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
  84. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  85. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
  86. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
  88. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
  89. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  90. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
  91. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  92. package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
  93. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  94. package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
  95. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
  96. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  97. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
  98. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  99. package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
  100. package/speechflow-cli/dst/speechflow-node.js +13 -2
  101. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  102. package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
  103. package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
  104. package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
  105. package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
  106. package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
  107. package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
  108. package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
  109. package/speechflow-cli/dst/speechflow-utils.js +123 -35
  110. package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
  111. package/speechflow-cli/dst/speechflow.js +114 -27
  112. package/speechflow-cli/dst/speechflow.js.map +1 -1
  113. package/speechflow-cli/etc/oxlint.jsonc +112 -11
  114. package/speechflow-cli/etc/stx.conf +2 -2
  115. package/speechflow-cli/etc/tsconfig.json +1 -1
  116. package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
  117. package/speechflow-cli/package.json +102 -94
  118. package/speechflow-cli/src/lib.d.ts +24 -0
  119. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
  120. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
  121. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
  122. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
  123. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
  124. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
  125. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
  126. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
  127. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +37 -56
  128. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
  129. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
  130. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
  131. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
  132. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
  133. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
  134. package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
  135. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +16 -33
  136. package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
  137. package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
  138. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
  139. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
  140. package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
  141. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
  142. package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
  143. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
  144. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
  145. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
  146. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
  147. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
  148. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
  149. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
  150. package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
  151. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
  152. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
  153. package/speechflow-cli/src/speechflow-node.ts +21 -8
  154. package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
  155. package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
  156. package/speechflow-cli/src/speechflow-utils.ts +125 -32
  157. package/speechflow-cli/src/speechflow.ts +118 -30
  158. package/speechflow-ui-db/dst/index.css +1 -1
  159. package/speechflow-ui-db/dst/index.js +31 -31
  160. package/speechflow-ui-db/etc/eslint.mjs +0 -1
  161. package/speechflow-ui-db/etc/tsc-client.json +3 -3
  162. package/speechflow-ui-db/package.json +11 -10
  163. package/speechflow-ui-db/src/app.vue +96 -78
  164. package/speechflow-ui-st/dst/index.js +26 -26
  165. package/speechflow-ui-st/etc/eslint.mjs +0 -1
  166. package/speechflow-ui-st/etc/tsc-client.json +3 -3
  167. package/speechflow-ui-st/package.json +11 -10
  168. package/speechflow-ui-st/src/app.vue +5 -12
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
22
22
  /* internal state */
23
23
  private emitInterval: ReturnType<typeof setInterval> | null = null
24
24
  private calcInterval: ReturnType<typeof setInterval> | null = null
25
- private pendingCalculations = new Set<ReturnType<typeof setTimeout>>()
25
+ private silenceTimer: ReturnType<typeof setTimeout> | null = null
26
26
  private chunkBuffer = new Float32Array(0)
27
27
  private destroyed = false
28
28
 
@@ -56,7 +56,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
56
56
  let sampleWindow = new Float32Array(sampleWindowSize)
57
57
  sampleWindow.fill(0, 0, sampleWindowSize)
58
58
  let lufss = -60
59
- let rms = -60
59
+ let rms = -60
60
60
 
61
61
  /* chunk processing state */
62
62
  const chunkDuration = 0.050 /* meter update frequency is about 50ms */
@@ -67,50 +67,33 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
67
67
  const processChunk = (chunkData: Float32Array) => {
68
68
  /* update internal audio sample sliding window */
69
69
  const newWindow = new Float32Array(sampleWindowSize)
70
- const keepSize = sampleWindowSize - chunkData.length
71
- newWindow.set(sampleWindow.slice(sampleWindow.length - keepSize), 0)
72
- newWindow.set(chunkData, keepSize)
70
+ newWindow.set(sampleWindow.slice(chunkData.length), 0)
71
+ newWindow.set(chunkData, sampleWindowSize - chunkData.length)
73
72
  sampleWindow = newWindow
74
73
 
75
- /* asynchronously calculate the LUFS-S metric */
76
- const calculator = setTimeout(() => {
77
- if (this.destroyed)
78
- return
79
- try {
80
- this.pendingCalculations.delete(calculator)
81
- const audioData = {
82
- sampleRate: this.config.audioSampleRate,
83
- numberOfChannels: this.config.audioChannels,
84
- channelData: [ sampleWindow ],
85
- duration: sampleWindowDuration,
86
- length: sampleWindow.length
87
- } satisfies AudioData
88
- const lufs = getLUFS(audioData, {
89
- channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
90
- calculateShortTerm: true,
91
- calculateMomentary: false,
92
- calculateLoudnessRange: false,
93
- calculateTruePeak: false
94
- })
95
- if (!this.destroyed) {
96
- if (timer !== null) {
97
- clearTimeout(timer)
98
- timer = null
99
- }
100
- lufss = lufs.shortTerm ? lufs.shortTerm[0] : 0
101
- rms = getRMS(audioData, { asDB: true })
102
- timer = setTimeout(() => {
103
- lufss = -60
104
- rms = -60
105
- }, 500)
106
- }
107
- }
108
- catch (error) {
109
- if (!this.destroyed)
110
- this.log("warning", `meter calculation error: ${error}`)
111
- }
112
- }, 0)
113
- this.pendingCalculations.add(calculator)
74
+ /* calculate the LUFS-S and RMS metric */
75
+ const audioData = {
76
+ sampleRate: this.config.audioSampleRate,
77
+ numberOfChannels: this.config.audioChannels,
78
+ channelData: [ sampleWindow ],
79
+ duration: sampleWindowDuration,
80
+ length: sampleWindow.length
81
+ } satisfies AudioData
82
+ const lufs = getLUFS(audioData, {
83
+ channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
84
+ calculateShortTerm: true,
85
+ calculateMomentary: false,
86
+ calculateLoudnessRange: false,
87
+ calculateTruePeak: false
88
+ })
89
+ lufss = lufs.shortTerm ? lufs.shortTerm[0] : -60
90
+ rms = getRMS(audioData, { asDB: true })
91
+ if (this.silenceTimer !== null)
92
+ clearTimeout(this.silenceTimer)
93
+ this.silenceTimer = setTimeout(() => {
94
+ lufss = -60
95
+ rms = -60
96
+ }, 500)
114
97
  }
115
98
 
116
99
  /* setup chunking interval */
@@ -121,8 +104,8 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
121
104
  /* process one single 50ms chunk if available */
122
105
  if (this.chunkBuffer.length >= samplesPerChunk) {
123
106
  const chunkData = this.chunkBuffer.slice(0, samplesPerChunk)
124
- processChunk(chunkData)
125
107
  this.chunkBuffer = this.chunkBuffer.slice(samplesPerChunk)
108
+ processChunk(chunkData)
126
109
  }
127
110
  }, chunkDuration * 1000)
128
111
 
@@ -134,12 +117,11 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
134
117
  this.sendResponse([ "meter", "LUFS-S", lufss ])
135
118
  this.sendResponse([ "meter", "RMS", rms ])
136
119
  if (this.params.dashboard !== "")
137
- this.dashboardInfo("audio", this.params.dashboard, "final", lufss)
120
+ this.sendDashboard("audio", this.params.dashboard, "final", lufss)
138
121
  }, this.params.interval)
139
122
 
140
123
  /* provide Duplex stream and internally attach to meter */
141
124
  const self = this
142
- let timer: ReturnType<typeof setTimeout> | null = null
143
125
  this.stream = new Stream.Transform({
144
126
  writableObjectMode: true,
145
127
  readableObjectMode: true,
@@ -173,7 +155,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
173
155
  callback()
174
156
  }
175
157
  catch (error) {
176
- callback(error instanceof Error ? error : new Error("Meter processing failed"))
158
+ callback(error instanceof Error ? error : new Error("meter processing failed"))
177
159
  }
178
160
  }
179
161
  },
@@ -190,14 +172,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
190
172
 
191
173
  /* close node */
192
174
  async close () {
193
- /* indicate destruction */
194
- this.destroyed = true
195
-
196
- /* clear all pending calculations */
197
- for (const timeout of this.pendingCalculations)
198
- clearTimeout(timeout)
199
- this.pendingCalculations.clear()
200
-
201
175
  /* stop intervals */
202
176
  if (this.emitInterval !== null) {
203
177
  clearInterval(this.emitInterval)
@@ -207,11 +181,18 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
207
181
  clearInterval(this.calcInterval)
208
182
  this.calcInterval = null
209
183
  }
184
+ if (this.silenceTimer !== null) {
185
+ clearTimeout(this.silenceTimer)
186
+ this.silenceTimer = null
187
+ }
210
188
 
211
189
  /* close stream */
212
190
  if (this.stream !== null) {
213
191
  this.stream.destroy()
214
192
  this.stream = null
215
193
  }
194
+
195
+ /* indicate destruction */
196
+ this.destroyed = true
216
197
  }
217
198
  }
@@ -43,9 +43,10 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
43
43
  throw new Error("mute: node already destroyed")
44
44
  try {
45
45
  if (params.length === 2 && params[0] === "mode") {
46
- if (!params[1].match(/^(?:none|silenced|unplugged)$/))
46
+ if (typeof params[1] !== "string" ||
47
+ !params[1].match(/^(?:none|silenced|unplugged)$/))
47
48
  throw new Error("mute: invalid mode argument in external request")
48
- const muteMode: MuteMode = params[1] as MuteMode
49
+ const muteMode = params[1] as MuteMode
49
50
  this.setMuteMode(muteMode)
50
51
  this.sendResponse([ "mute", "mode", muteMode ])
51
52
  }
@@ -0,0 +1,62 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* internal dependencies */
8
+ import { parentPort } from "node:worker_threads"
9
+
10
+ /* external dependencies */
11
+ import { type DenoiseState, Rnnoise } from "@shiguredo/rnnoise-wasm"
12
+
13
+ /* WASM state */
14
+ let rnnoise: Rnnoise
15
+ let denoiseState: DenoiseState
16
+
17
+ /* global initialization */
18
+ ;(async () => {
19
+ try {
20
+ rnnoise = await Rnnoise.load()
21
+ denoiseState = rnnoise.createDenoiseState()
22
+ parentPort!.postMessage({ type: "ready" })
23
+ }
24
+ catch (err) {
25
+ parentPort!.postMessage({ type: "failed", message: `failed to initialize RNNoise: ${err}` })
26
+ process.exit(1)
27
+ }
28
+ })()
29
+
30
+ /* receive messages */
31
+ parentPort!.on("message", (msg) => {
32
+ if (msg.type === "process") {
33
+ /* process a single audio frame */
34
+ const { id, data } = msg
35
+
36
+ /* convert regular Int16Array [-32768,32768]
37
+ to unusual non-normalized Float32Array [-32768,32768]
38
+ as required by RNNoise */
39
+ const f32a = new Float32Array(data.length)
40
+ for (let i = 0; i < data.length; i++)
41
+ f32a[i] = data[i]
42
+
43
+ /* process frame with RNNoise WASM */
44
+ denoiseState.processFrame(f32a)
45
+
46
+ /* convert back Float32Array to Int16Array */
47
+ const i16 = new Int16Array(data.length)
48
+ for (let i = 0; i < data.length; i++)
49
+ i16[i] = Math.round(f32a[i])
50
+
51
+ parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
52
+ }
53
+ else if (msg.type === "close") {
54
+ /* shutdown this process */
55
+ try {
56
+ denoiseState.destroy()
57
+ }
58
+ finally {
59
+ process.exit(0)
60
+ }
61
+ }
62
+ })
@@ -0,0 +1,164 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+ import { Worker } from "node:worker_threads"
10
+ import { resolve } from "node:path"
11
+
12
+ /* internal dependencies */
13
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
14
+ import * as utils from "./speechflow-utils"
15
+
16
+ /* SpeechFlow node for RNNoise based noise suppression in audio-to-audio passing */
17
+ export default class SpeechFlowNodeRNNoise extends SpeechFlowNode {
18
+ /* declare official node name */
19
+ public static name = "rnnoise"
20
+
21
+ /* internal state */
22
+ private destroyed = false
23
+ private sampleSize = 480 /* = 10ms at 48KHz, as required by RNNoise! */
24
+ private worker: Worker | null = null
25
+
26
+ /* construct node */
27
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
28
+ super(id, cfg, opts, args)
29
+
30
+ /* declare node configuration parameters */
31
+ this.configure({})
32
+
33
+ /* declare node input/output format */
34
+ this.input = "audio"
35
+ this.output = "audio"
36
+ }
37
+
38
+ /* open node */
39
+ async open () {
40
+ /* clear destruction flag */
41
+ this.destroyed = false
42
+
43
+ /* initialize worker */
44
+ this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
45
+ this.worker.on("error", (err) => {
46
+ this.log("error", `RNNoise worker thread error: ${err}`)
47
+ })
48
+ this.worker.on("exit", (code) => {
49
+ if (code !== 0)
50
+ this.log("error", `RNNoise worker thread exited with error code ${code}`)
51
+ else
52
+ this.log("info", `RNNoise worker thread exited with regular code ${code}`)
53
+ })
54
+ await new Promise<void>((resolve, reject) => {
55
+ const timeout = setTimeout(() => {
56
+ reject(new Error("RNNoise worker thread initialization timeout"))
57
+ }, 5000)
58
+ this.worker!.once("message", (msg: any) => {
59
+ clearTimeout(timeout)
60
+ if (typeof msg === "object" && msg !== null && msg.type === "ready")
61
+ resolve()
62
+ else if (typeof msg === "object" && msg !== null && msg.type === "failed")
63
+ reject(new Error(msg.message ?? "RNNoise worker thread initialization failed"))
64
+ else
65
+ reject(new Error(`RNNoise worker thread sent unexpected message on startup`))
66
+ })
67
+ this.worker!.once("error", (err) => {
68
+ clearTimeout(timeout)
69
+ reject(err)
70
+ })
71
+ })
72
+
73
+ /* receive message from worker */
74
+ const pending = new Map<string, (arr: Int16Array<ArrayBuffer>) => void>()
75
+ this.worker.on("message", (msg: any) => {
76
+ if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
77
+ const cb = pending.get(msg.id)
78
+ pending.delete(msg.id)
79
+ if (cb)
80
+ cb(msg.data)
81
+ else
82
+ this.log("warning", `RNNoise worker thread sent back unexpected id: ${msg.id}`)
83
+ }
84
+ else
85
+ this.log("warning", `RNNoise worker thread sent unexpected message: ${JSON.stringify(msg)}`)
86
+ })
87
+
88
+ /* send message to worker */
89
+ let seq = 0
90
+ const workerProcessSegment = async (segment: Int16Array<ArrayBuffer>) => {
91
+ if (this.destroyed)
92
+ return segment
93
+ const id = `${seq++}`
94
+ return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
95
+ pending.set(id, (segment: Int16Array<ArrayBuffer>) => { resolve(segment) })
96
+ this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
97
+ })
98
+ }
99
+
100
+ /* establish a transform stream */
101
+ const self = this
102
+ this.stream = new Stream.Transform({
103
+ readableObjectMode: true,
104
+ writableObjectMode: true,
105
+ decodeStrings: false,
106
+ transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
107
+ if (self.destroyed) {
108
+ callback(new Error("stream already destroyed"))
109
+ return
110
+ }
111
+ if (!Buffer.isBuffer(chunk.payload))
112
+ callback(new Error("invalid chunk payload type"))
113
+ else {
114
+ /* convert Buffer into Int16Array */
115
+ const payload = utils.convertBufToI16(chunk.payload)
116
+
117
+ /* process Int16Array in necessary segments */
118
+ utils.processInt16ArrayInSegments(payload, self.sampleSize, (segment) =>
119
+ workerProcessSegment(segment)
120
+ ).then((payload: Int16Array<ArrayBuffer>) => {
121
+ /* convert Int16Array into Buffer */
122
+ const buf = utils.convertI16ToBuf(payload)
123
+
124
+ /* update chunk */
125
+ chunk.payload = buf
126
+
127
+ /* forward updated chunk */
128
+ this.push(chunk)
129
+ callback()
130
+ }).catch((err: Error) => {
131
+ self.log("warning", `processing of chunk failed: ${err}`)
132
+ callback(err)
133
+ })
134
+ }
135
+ },
136
+ final (callback) {
137
+ if (self.destroyed) {
138
+ callback()
139
+ return
140
+ }
141
+ this.push(null)
142
+ callback()
143
+ }
144
+ })
145
+ }
146
+
147
+ /* close node */
148
+ async close () {
149
+ /* indicate destruction */
150
+ this.destroyed = true
151
+
152
+ /* shutdown worker */
153
+ if (this.worker !== null) {
154
+ this.worker.terminate()
155
+ this.worker = null
156
+ }
157
+
158
+ /* close stream */
159
+ if (this.stream !== null) {
160
+ this.stream.destroy()
161
+ this.stream = null
162
+ }
163
+ }
164
+ }
@@ -0,0 +1,137 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import path from "node:path"
9
+ import fs from "node:fs"
10
+ import Stream from "node:stream"
11
+
12
+ /* external dependencies */
13
+ import { loadSpeexModule, SpeexPreprocessor } from "@sapphi-red/speex-preprocess-wasm"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as utils from "./speechflow-utils"
18
+
19
+ /* SpeechFlow node for Speex based noise suppression in audio-to-audio passing */
20
+ export default class SpeechFlowNodeSpeex extends SpeechFlowNode {
21
+ /* declare official node name */
22
+ public static name = "speex"
23
+
24
+ /* internal state */
25
+ private destroyed = false
26
+ private sampleSize = 480 /* = 10ms at 48KHz */
27
+ private speexProcessor: SpeexPreprocessor | null = null
28
+
29
+ /* construct node */
30
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
31
+ super(id, cfg, opts, args)
32
+
33
+ /* declare node configuration parameters */
34
+ this.configure({
35
+ attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 },
36
+ })
37
+
38
+ /* declare node input/output format */
39
+ this.input = "audio"
40
+ this.output = "audio"
41
+ }
42
+
43
+ /* open node */
44
+ async open () {
45
+ /* clear destruction flag */
46
+ this.destroyed = false
47
+
48
+ /* validate sample rate compatibility */
49
+ if (this.config.audioSampleRate !== 48000)
50
+ throw new Error(`Speex node requires 48KHz sample rate, got ${this.config.audioSampleRate}Hz`)
51
+
52
+ /* initialize and configure Speex pre-processor */
53
+ const wasmBinary = await fs.promises.readFile(
54
+ path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
55
+ const speexModule = await loadSpeexModule({
56
+ wasmBinary: wasmBinary.buffer as ArrayBuffer
57
+ })
58
+ this.speexProcessor = new SpeexPreprocessor(
59
+ speexModule, this.sampleSize, this.config.audioSampleRate)
60
+ this.speexProcessor.denoise = true
61
+ this.speexProcessor.noiseSuppress = this.params.attenuate
62
+ this.speexProcessor.agc = false
63
+ this.speexProcessor.vad = false
64
+ this.speexProcessor.echoSuppress = 0
65
+ this.speexProcessor.echoSuppressActive = 0
66
+
67
+ /* establish a transform stream */
68
+ const self = this
69
+ this.stream = new Stream.Transform({
70
+ readableObjectMode: true,
71
+ writableObjectMode: true,
72
+ decodeStrings: false,
73
+ transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
74
+ if (self.destroyed) {
75
+ callback(new Error("stream already destroyed"))
76
+ return
77
+ }
78
+ if (!Buffer.isBuffer(chunk.payload))
79
+ callback(new Error("invalid chunk payload type"))
80
+ else {
81
+ /* convert Buffer into Int16Array */
82
+ const payload = utils.convertBufToI16(chunk.payload)
83
+
84
+ /* process Int16Array in necessary fixed-size segments */
85
+ utils.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
86
+ if (self.destroyed)
87
+ throw new Error("stream already destroyed")
88
+ self.speexProcessor?.processInt16(segment)
89
+ return Promise.resolve(segment)
90
+ }).then((payload: Int16Array<ArrayBuffer>) => {
91
+ if (self.destroyed)
92
+ throw new Error("stream already destroyed")
93
+
94
+ /* convert Int16Array back into Buffer */
95
+ const buf = utils.convertI16ToBuf(payload)
96
+
97
+ /* update chunk */
98
+ chunk.payload = buf
99
+
100
+ /* forward updated chunk */
101
+ this.push(chunk)
102
+ callback()
103
+ }).catch((err: Error) => {
104
+ self.log("warning", `processing of chunk failed: ${err}`)
105
+ callback(err)
106
+ })
107
+ }
108
+ },
109
+ final (callback) {
110
+ if (self.destroyed) {
111
+ callback()
112
+ return
113
+ }
114
+ this.push(null)
115
+ callback()
116
+ }
117
+ })
118
+ }
119
+
120
+ /* close node */
121
+ async close () {
122
+ /* indicate destruction */
123
+ this.destroyed = true
124
+
125
+ /* destroy processor */
126
+ if (this.speexProcessor !== null) {
127
+ this.speexProcessor.destroy()
128
+ this.speexProcessor = null
129
+ }
130
+
131
+ /* close stream */
132
+ if (this.stream !== null) {
133
+ this.stream.destroy()
134
+ this.stream = null
135
+ }
136
+ }
137
+ }
@@ -205,7 +205,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
205
205
  if ((chunks * chunkSize) < data.length) {
206
206
  const frame = new Float32Array(chunkSize)
207
207
  frame.fill(0)
208
- frame.set(data.slice(chunks * chunkSize, data.length))
208
+ frame.set(data.slice(chunks * chunkSize))
209
209
  const segment: AudioQueueElementSegment = { data: frame }
210
210
  segmentData.push(segment)
211
211
  }
@@ -315,7 +315,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
315
315
  && element.type === "audio-frame"
316
316
  && element.isSpeech !== undefined)
317
317
  flushPendingChunks()
318
- else if (!self.destroyed) {
318
+ else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
319
319
  self.queue.once("write", awaitForthcomingChunks)
320
320
  self.activeEventListeners.add(awaitForthcomingChunks)
321
321
  }
@@ -328,7 +328,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
328
328
  && element.type === "audio-frame"
329
329
  && element.isSpeech !== undefined)
330
330
  flushPendingChunks()
331
- else if (!self.destroyed) {
331
+ else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
332
332
  self.queue.once("write", awaitForthcomingChunks)
333
333
  self.activeEventListeners.add(awaitForthcomingChunks)
334
334
  }
@@ -21,22 +21,19 @@ const writeWavHeader = (
21
21
  const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
22
22
 
23
23
  const headerLength = 44
24
- const dataLength = length || (4294967295 - 100)
24
+ const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
25
+ const dataLength = length ?? maxDataSize
25
26
  const fileSize = dataLength + headerLength
26
27
  const header = Buffer.alloc(headerLength)
27
28
 
28
- const RIFF = Buffer.alloc(4, "RIFF")
29
- const WAVE = Buffer.alloc(4, "WAVE")
30
- const fmt = Buffer.alloc(4, "fmt ")
31
- const data = Buffer.alloc(4, "data")
32
29
  const byteRate = (sampleRate * channels * bitDepth) / 8
33
30
  const blockAlign = (channels * bitDepth) / 8
34
31
 
35
32
  let offset = 0
36
- RIFF.copy(header, offset); offset += RIFF.length
33
+ header.write("RIFF", offset); offset += 4
37
34
  header.writeUInt32LE(fileSize - 8, offset); offset += 4
38
- WAVE.copy(header, offset); offset += WAVE.length
39
- fmt.copy(header, offset); offset += fmt.length
35
+ header.write("WAVE", offset); offset += 4
36
+ header.write("fmt ", offset); offset += 4
40
37
  header.writeUInt32LE(16, offset); offset += 4
41
38
  header.writeUInt16LE(audioFormat, offset); offset += 2
42
39
  header.writeUInt16LE(channels, offset); offset += 2
@@ -44,7 +41,7 @@ const writeWavHeader = (
44
41
  header.writeUInt32LE(byteRate, offset); offset += 4
45
42
  header.writeUInt16LE(blockAlign, offset); offset += 2
46
43
  header.writeUInt16LE(bitDepth, offset); offset += 2
47
- data.copy(header, offset); offset += data.length
44
+ header.write("data", offset); offset += 4
48
45
  header.writeUInt32LE(dataLength, offset); offset += 4
49
46
 
50
47
  return header
@@ -52,6 +49,9 @@ const writeWavHeader = (
52
49
 
53
50
  /* read WAV header */
54
51
  const readWavHeader = (buffer: Buffer) => {
52
+ if (buffer.length < 44)
53
+ throw new Error("WAV header too short, expected at least 44 bytes")
54
+
55
55
  let offset = 0
56
56
  const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
57
57
  const fileSize = buffer.readUInt32LE(offset); offset += 4
@@ -67,6 +67,15 @@ const readWavHeader = (buffer: Buffer) => {
67
67
  const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
68
68
  const dataLength = buffer.readUInt32LE(offset); offset += 4
69
69
 
70
+ if (riffHead !== "RIFF")
71
+ throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
72
+ if (waveHead !== "WAVE")
73
+ throw new Error(`Invalid WAV file: expected WAVE header, got "${waveHead}"`)
74
+ if (fmtHead !== "fmt ")
75
+ throw new Error(`Invalid WAV file: expected "fmt " header, got "${fmtHead}"`)
76
+ if (data !== "data")
77
+ throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
78
+
70
79
  return {
71
80
  riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
72
81
  channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
@@ -103,10 +112,8 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
103
112
  decodeStrings: false,
104
113
  highWaterMark: 1,
105
114
  transform (chunk: SpeechFlowChunk, encoding, callback) {
106
- if (!Buffer.isBuffer(chunk.payload)) {
115
+ if (!Buffer.isBuffer(chunk.payload))
107
116
  callback(new Error("invalid chunk payload type"))
108
- return
109
- }
110
117
  else if (firstChunk) {
111
118
  if (self.params.mode === "encode") {
112
119
  /* convert raw/PCM to WAV/PCM
@@ -164,13 +171,13 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
164
171
  callback(new Error(`invalid operation mode "${self.params.mode}"`))
165
172
  return
166
173
  }
174
+ firstChunk = false
167
175
  }
168
176
  else {
169
177
  /* pass-through original chunk */
170
178
  this.push(chunk)
171
179
  callback()
172
180
  }
173
- firstChunk = false
174
181
  },
175
182
  final (callback) {
176
183
  this.push(null)