speechflow 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.md +220 -7
  3. package/etc/claude.md +70 -0
  4. package/etc/speechflow.yaml +5 -3
  5. package/etc/stx.conf +7 -0
  6. package/package.json +7 -6
  7. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
  8. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
  9. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
  10. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
  13. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
  14. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
  15. package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
  16. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
  17. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
  18. package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
  20. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
  21. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
  22. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
  23. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
  24. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
  25. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
  26. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
  28. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
  29. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
  30. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
  31. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
  32. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
  33. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
  35. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
  37. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
  38. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  39. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
  40. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
  42. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
  43. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
  45. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
  46. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
  47. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
  48. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
  49. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
  50. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
  51. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
  53. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  54. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
  55. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
  56. package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
  57. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +15 -13
  58. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
  60. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
  61. package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
  62. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
  63. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
  64. package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
  65. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
  66. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
  68. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
  70. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
  71. package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
  72. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
  73. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
  75. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  76. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
  77. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
  79. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
  81. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
  83. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  84. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
  85. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  86. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
  87. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
  88. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  89. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
  90. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  91. package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
  92. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  93. package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
  94. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
  95. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
  97. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  98. package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
  99. package/speechflow-cli/dst/speechflow-node.js +13 -2
  100. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  101. package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
  102. package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
  103. package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
  104. package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
  105. package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
  106. package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
  107. package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
  108. package/speechflow-cli/dst/speechflow-utils.js +123 -35
  109. package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
  110. package/speechflow-cli/dst/speechflow.js +69 -14
  111. package/speechflow-cli/dst/speechflow.js.map +1 -1
  112. package/speechflow-cli/etc/oxlint.jsonc +112 -11
  113. package/speechflow-cli/etc/stx.conf +2 -2
  114. package/speechflow-cli/etc/tsconfig.json +1 -1
  115. package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
  116. package/speechflow-cli/package.json +102 -94
  117. package/speechflow-cli/src/lib.d.ts +24 -0
  118. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
  119. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
  120. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
  121. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
  122. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
  123. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
  124. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
  125. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
  126. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
  127. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
  128. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
  129. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
  130. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
  131. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
  132. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
  133. package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
  134. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +15 -13
  135. package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
  136. package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
  137. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
  138. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
  139. package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
  140. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
  141. package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
  142. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
  143. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
  144. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
  145. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
  146. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
  147. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
  148. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
  149. package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
  150. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
  151. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
  152. package/speechflow-cli/src/speechflow-node.ts +21 -8
  153. package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
  154. package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
  155. package/speechflow-cli/src/speechflow-utils.ts +125 -32
  156. package/speechflow-cli/src/speechflow.ts +74 -17
  157. package/speechflow-ui-db/dst/index.js +31 -31
  158. package/speechflow-ui-db/etc/eslint.mjs +0 -1
  159. package/speechflow-ui-db/etc/tsc-client.json +3 -3
  160. package/speechflow-ui-db/package.json +11 -10
  161. package/speechflow-ui-db/src/app.vue +20 -6
  162. package/speechflow-ui-st/dst/index.js +26 -26
  163. package/speechflow-ui-st/etc/eslint.mjs +0 -1
  164. package/speechflow-ui-st/etc/tsc-client.json +3 -3
  165. package/speechflow-ui-st/package.json +11 -10
  166. package/speechflow-ui-st/src/app.vue +5 -12
@@ -75,7 +75,7 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
75
75
  "c:a": "pcm_s16le",
76
76
  "ar": this.config.audioSampleRate,
77
77
  "ac": this.config.audioChannels,
78
- "f": "s16le",
78
+ "f": "s16le"
79
79
  } : {}),
80
80
  ...(this.params.dst === "wav" ? {
81
81
  "f": "wav"
@@ -90,7 +90,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
90
90
  "f": "opus"
91
91
  } : {})
92
92
  })
93
- this.ffmpeg.run()
93
+ try {
94
+ this.ffmpeg.run()
95
+ }
96
+ catch (err) {
97
+ throw new Error(`failed to start FFmpeg process: ${err}`)
98
+ }
94
99
 
95
100
  /* establish a duplex stream and connect it to FFmpeg */
96
101
  this.stream = Stream.Duplex.from({
@@ -120,7 +125,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
120
125
 
121
126
  /* shutdown FFmpeg */
122
127
  if (this.ffmpeg !== null) {
123
- this.ffmpeg.kill()
128
+ try {
129
+ this.ffmpeg.kill()
130
+ }
131
+ catch {
132
+ /* ignore kill errors during cleanup */
133
+ }
124
134
  this.ffmpeg = null
125
135
  }
126
136
  }
@@ -0,0 +1,223 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+ import { EventEmitter } from "node:events"
10
+ import { Duration } from "luxon"
11
+
12
+ /* internal dependencies */
13
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
14
+ import * as utils from "./speechflow-utils"
15
+
16
+ class AudioFiller extends EventEmitter {
17
+ private emittedEndSamples = 0 /* stream position in samples already emitted */
18
+ private readonly bytesPerSample = 2 /* PCM I16 */
19
+ private readonly bytesPerFrame: number
20
+ private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
21
+
22
+ constructor (private sampleRate = 48000, private channels = 1) {
23
+ super()
24
+ this.bytesPerFrame = this.channels * this.bytesPerSample
25
+ }
26
+
27
+ /* optional helper to allow subscribing with strong typing */
28
+ public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
29
+ public on(event: string, listener: (...args: any[]) => void): this {
30
+ return super.on(event, listener)
31
+ }
32
+
33
+ /* convert fractional samples to duration */
34
+ private samplesFromDuration(duration: Duration): number {
35
+ const seconds = duration.as("seconds")
36
+ const samples = seconds * this.sampleRate
37
+ return samples
38
+ }
39
+
40
+ /* convert duration to fractional samples */
41
+ private durationFromSamples(samples: number): Duration {
42
+ const seconds = samples / this.sampleRate
43
+ return Duration.fromObject({ seconds })
44
+ }
45
+
46
+ /* emit a chunk of silence */
47
+ private emitSilence (fromSamples: number, toSamples: number) {
48
+ const frames = Math.max(0, Math.floor(toSamples - fromSamples))
49
+ if (frames <= 0)
50
+ return
51
+ const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
52
+ const timestampStart = this.durationFromSamples(fromSamples)
53
+ const timestampEnd = this.durationFromSamples(toSamples)
54
+ const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
55
+ this.emit("chunk", chunk)
56
+ }
57
+
58
+ /* add a chunk of audio for processing */
59
+ public add (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }): void {
60
+ const startSamp = this.samplesFromDuration(chunk.timestampStart)
61
+ const endSamp = this.samplesFromDuration(chunk.timestampEnd)
62
+ if (endSamp < startSamp)
63
+ throw new Error("invalid timestamps")
64
+
65
+ /* if chunk starts beyond what we've emitted, insert silence for the gap */
66
+ if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
67
+ this.emitSilence(this.emittedEndSamples, startSamp)
68
+ this.emittedEndSamples = startSamp
69
+ }
70
+
71
+ /* if chunk ends before or at emitted end, we have it fully covered, so drop it */
72
+ if (endSamp <= this.emittedEndSamples + this.sampleTolerance)
73
+ return
74
+
75
+ /* trim any overlap at the head */
76
+ const trimHead = Math.max(0, Math.floor(this.emittedEndSamples - startSamp))
77
+ const availableFrames = Math.floor((endSamp - startSamp) - trimHead)
78
+ if (availableFrames <= 0)
79
+ return
80
+
81
+ /* determine how many frames the buffer actually has; trust timestamps primarily */
82
+ const bufFrames = Math.floor(chunk.payload.length / this.bytesPerFrame)
83
+ const startFrame = Math.min(trimHead, bufFrames)
84
+ const endFrame = Math.min(startFrame + availableFrames, bufFrames)
85
+ if (endFrame <= startFrame)
86
+ return
87
+
88
+ /* determine trimmed/normalized chunk */
89
+ const payload = chunk.payload.subarray(
90
+ startFrame * this.bytesPerFrame,
91
+ endFrame * this.bytesPerFrame)
92
+
93
+ /* emit trimmed/normalized chunk */
94
+ const outStartSamples = startSamp + startFrame
95
+ const outEndSamples = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
96
+ const timestampStart = this.durationFromSamples(outStartSamples)
97
+ const timestampEnd = this.durationFromSamples(outEndSamples)
98
+ const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
99
+ this.emit("chunk", c)
100
+
101
+ /* advance emitted cursor */
102
+ this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
103
+ }
104
+ }
105
+
106
+ /* SpeechFlow node for filling audio gaps */
107
+ export default class SpeechFlowNodeFiller extends SpeechFlowNode {
108
+ /* declare official node name */
109
+ public static name = "filler"
110
+
111
+ /* internal state */
112
+ private destroyed = false
113
+ private filler: AudioFiller | null = null
114
+ private sendQueue: utils.AsyncQueue<SpeechFlowChunk | null> | null = null
115
+
116
+ /* construct node */
117
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
118
+ super(id, cfg, opts, args)
119
+
120
+ /* declare node configuration parameters */
121
+ this.configure({
122
+ segment: { type: "number", val: 50, pos: 0, match: (n: number) => n >= 10 && n <= 1000 }
123
+ })
124
+
125
+ /* declare node input/output format */
126
+ this.input = "audio"
127
+ this.output = "audio"
128
+ }
129
+
130
+ /* open node */
131
+ async open () {
132
+ /* clear destruction flag */
133
+ this.destroyed = false
134
+
135
+ /* establish queues */
136
+ this.filler = new AudioFiller(this.config.audioSampleRate, this.config.audioChannels)
137
+ this.sendQueue = new utils.AsyncQueue<SpeechFlowChunk | null>()
138
+
139
+ /* shift chunks from filler to send queue */
140
+ this.filler.on("chunk", (chunk) => {
141
+ this.sendQueue?.write(chunk)
142
+ })
143
+
144
+ /* establish a duplex stream */
145
+ const self = this
146
+ this.stream = new Stream.Duplex({
147
+ readableObjectMode: true,
148
+ writableObjectMode: true,
149
+ decodeStrings: false,
150
+ write (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }, encoding, callback) {
151
+ if (self.destroyed || self.filler === null)
152
+ callback(new Error("stream already destroyed"))
153
+ else if (!Buffer.isBuffer(chunk.payload))
154
+ callback(new Error("invalid chunk payload type"))
155
+ else {
156
+ try {
157
+ self.filler.add(chunk)
158
+ callback()
159
+ }
160
+ catch (error: any) {
161
+ callback(error)
162
+ }
163
+ }
164
+ },
165
+ read (size) {
166
+ if (self.destroyed || self.sendQueue === null) {
167
+ this.push(null)
168
+ return
169
+ }
170
+ self.sendQueue.read().then((chunk) => {
171
+ if (self.destroyed) {
172
+ this.push(null)
173
+ return
174
+ }
175
+ if (chunk === null) {
176
+ self.log("info", "received EOF signal")
177
+ this.push(null)
178
+ }
179
+ else {
180
+ self.log("debug", `received data (${chunk.payload.length} bytes)`)
181
+ this.push(chunk)
182
+ }
183
+ }).catch((error) => {
184
+ if (!self.destroyed)
185
+ self.log("error", `queue read error: ${error.message}`)
186
+ })
187
+ },
188
+ final (callback) {
189
+ if (self.destroyed) {
190
+ callback()
191
+ return
192
+ }
193
+ this.push(null)
194
+ callback()
195
+ }
196
+ })
197
+ }
198
+
199
+ /* close node */
200
+ async close () {
201
+ /* indicate destruction */
202
+ this.destroyed = true
203
+
204
+ /* destroy queues */
205
+ if (this.sendQueue !== null) {
206
+ this.sendQueue.destroy()
207
+ this.sendQueue = null
208
+ }
209
+
210
+ /* destroy filler */
211
+ if (this.filler !== null) {
212
+ this.filler.removeAllListeners()
213
+ this.filler = null
214
+ }
215
+
216
+ /* close stream */
217
+ if (this.stream !== null) {
218
+ this.stream.destroy()
219
+ this.stream = null
220
+ }
221
+ }
222
+ }
223
+
@@ -0,0 +1,98 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* internal dependencies */
11
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
+ import * as utils from "./speechflow-utils"
13
+
14
+ /* SpeechFlow node for gain adjustment in audio-to-audio passing */
15
+ export default class SpeechFlowNodeGain extends SpeechFlowNode {
16
+ /* declare official node name */
17
+ public static name = "gain"
18
+
19
+ /* internal state */
20
+ private destroyed = false
21
+
22
+ /* construct node */
23
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
24
+ super(id, cfg, opts, args)
25
+
26
+ /* declare node configuration parameters */
27
+ this.configure({
28
+ db: { type: "number", val: 0, pos: 0, match: (n: number) => n >= -60 && n <= 60 }
29
+ })
30
+
31
+ /* declare node input/output format */
32
+ this.input = "audio"
33
+ this.output = "audio"
34
+ }
35
+
36
+ /* open node */
37
+ async open () {
38
+ /* clear destruction flag */
39
+ this.destroyed = false
40
+
41
+ /* adjust gain */
42
+ const adjustGain = (chunk: SpeechFlowChunk & { payload: Buffer }, db: number) => {
43
+ const dv = new DataView(chunk.payload.buffer, chunk.payload.byteOffset, chunk.payload.byteLength)
44
+ const gainFactor = utils.dB2lin(db)
45
+ for (let i = 0; i < dv.byteLength; i += 2) {
46
+ let sample = dv.getInt16(i, true)
47
+ sample *= gainFactor
48
+ sample = Math.max(Math.min(sample, 32767), -32768)
49
+ dv.setInt16(i, sample, true)
50
+ }
51
+ }
52
+
53
+ /* establish a transform stream */
54
+ const self = this
55
+ this.stream = new Stream.Transform({
56
+ readableObjectMode: true,
57
+ writableObjectMode: true,
58
+ decodeStrings: false,
59
+ transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
60
+ if (self.destroyed) {
61
+ callback(new Error("stream already destroyed"))
62
+ return
63
+ }
64
+ if (!Buffer.isBuffer(chunk.payload))
65
+ callback(new Error("invalid chunk payload type"))
66
+ else if (chunk.payload.byteLength % 2 !== 0)
67
+ callback(new Error("invalid audio buffer size (not 16-bit aligned)"))
68
+ else {
69
+ /* adjust chunk */
70
+ adjustGain(chunk, self.params.db)
71
+ this.push(chunk)
72
+ callback()
73
+ }
74
+ },
75
+ final (callback) {
76
+ if (self.destroyed) {
77
+ callback()
78
+ return
79
+ }
80
+ this.push(null)
81
+ callback()
82
+ }
83
+ })
84
+ }
85
+
86
+ /* close node */
87
+ async close () {
88
+ /* indicate destruction */
89
+ this.destroyed = true
90
+
91
+ /* close stream */
92
+ if (this.stream !== null) {
93
+ this.stream.destroy()
94
+ this.stream = null
95
+ }
96
+ }
97
+ }
98
+
@@ -21,7 +21,7 @@ type AudioQueueElement = {
21
21
  type: "audio-frame",
22
22
  chunk: SpeechFlowChunk,
23
23
  data: Float32Array,
24
- gender?: "male" | "female"
24
+ gender?: "male" | "female" | "unknown"
25
25
  } | {
26
26
  type: "audio-eof"
27
27
  }
@@ -32,7 +32,6 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
32
32
  public static name = "gender"
33
33
 
34
34
  /* internal state */
35
- private static speexInitialized = false
36
35
  private classifier: Transformers.AudioClassificationPipeline | null = null
37
36
  private queue = new utils.Queue<AudioQueueElement>()
38
37
  private queueRecv = this.queue.pointerUse("recv")
@@ -66,7 +65,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
66
65
  this.shutdown = false
67
66
 
68
67
  /* pass-through logging */
69
- const log = (level: string, msg: string) => { this.log(level, msg) }
68
+ const log = this.log.bind(this)
70
69
 
71
70
  /* the used model */
72
71
  const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
@@ -81,7 +80,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
81
80
  artifact += `:${progress.file}`
82
81
  let percent = 0
83
82
  if (typeof progress.loaded === "number" && typeof progress.total === "number")
84
- percent = (progress.loaded as number / progress.total as number) * 100
83
+ percent = (progress.loaded / progress.total) * 100
85
84
  else if (typeof progress.progress === "number")
86
85
  percent = progress.progress
87
86
  if (percent > 0)
@@ -92,7 +91,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
92
91
  return
93
92
  for (const [ artifact, percent ] of progressState) {
94
93
  this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
95
- if (percent >= 1.0)
94
+ if (percent >= 100.0)
96
95
  progressState.delete(artifact)
97
96
  }
98
97
  }, 1000)
@@ -103,11 +102,17 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
103
102
  device: "auto",
104
103
  progress_callback: progressCallback
105
104
  })
106
- const timeoutPromise = new Promise((resolve, reject) => setTimeout(() =>
107
- reject(new Error("model initialization timeout")), 30 * 1000))
105
+ let timeoutId: ReturnType<typeof setTimeout> | null = null
106
+ const timeoutPromise = new Promise((resolve, reject) => {
107
+ timeoutId = setTimeout(() =>
108
+ reject(new Error("model initialization timeout")), 30 * 1000)
109
+ })
108
110
  this.classifier = await Promise.race([
109
111
  pipelinePromise, timeoutPromise
110
- ]) as Transformers.AudioClassificationPipeline
112
+ ]).finally(() => {
113
+ if (timeoutId !== null)
114
+ clearTimeout(timeoutId)
115
+ }) as Transformers.AudioClassificationPipeline
111
116
  }
112
117
  catch (error) {
113
118
  if (this.progressInterval) {
@@ -128,10 +133,15 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
128
133
  if (this.shutdown || this.classifier === null)
129
134
  throw new Error("classifier shutdown during operation")
130
135
  const classifyPromise = this.classifier(data)
131
- const timeoutPromise = new Promise((resolve, reject) => setTimeout(() =>
132
- reject(new Error("classification timeout")), 30 * 1000))
133
- const result = await Promise.race([ classifyPromise, timeoutPromise ]) as
134
- Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
136
+ let timeoutId: ReturnType<typeof setTimeout> | null = null
137
+ const timeoutPromise = new Promise((resolve, reject) => {
138
+ timeoutId = setTimeout(() =>
139
+ reject(new Error("classification timeout")), 30 * 1000)
140
+ })
141
+ const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
142
+ if (timeoutId !== null)
143
+ clearTimeout(timeoutId)
144
+ }) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
135
145
  const classified = Array.isArray(result) ?
136
146
  result as Transformers.AudioClassificationOutput :
137
147
  [ result ]
@@ -139,15 +149,20 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
139
149
  const c2 = classified.find((c: any) => c.label === "female")
140
150
  const male = c1 ? c1.score : 0.0
141
151
  const female = c2 ? c2.score : 0.0
142
- return (male > female ? "male" : "female")
152
+ if (male > female)
153
+ return "male"
154
+ else if (male < female)
155
+ return "female"
156
+ else
157
+ return "unknown"
143
158
  }
144
159
 
145
160
  /* define sample rate required by model */
146
161
  const sampleRateTarget = 16000
147
162
 
148
163
  /* work off queued audio frames */
149
- const frameWindowDuration = 0.5
150
- const frameWindowSamples = frameWindowDuration * sampleRateTarget
164
+ const frameWindowDuration = this.params.window / 1000
165
+ const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
151
166
  let lastGender = ""
152
167
  let workingOff = false
153
168
  const workOffQueue = async () => {
@@ -236,8 +251,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
236
251
  const wav = new WaveFile()
237
252
  wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
238
253
  wav.toSampleRate(sampleRateTarget, { method: "cubic" })
239
- data = wav.getSamples(false, Float32Array<ArrayBuffer>) as
240
- any as Float32Array<ArrayBuffer>
254
+ data = wav.getSamples(false, Float32Array) as any as Float32Array<ArrayBuffer>
241
255
 
242
256
  /* queue chunk and converted data */
243
257
  self.queueRecv.append({ type: "audio-frame", chunk, data })
@@ -22,6 +22,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
22
22
  /* internal state */
23
23
  private emitInterval: ReturnType<typeof setInterval> | null = null
24
24
  private calcInterval: ReturnType<typeof setInterval> | null = null
25
+ private silenceTimer: ReturnType<typeof setTimeout> | null = null
25
26
  private chunkBuffer = new Float32Array(0)
26
27
  private destroyed = false
27
28
 
@@ -63,7 +64,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
63
64
  this.chunkBuffer = new Float32Array(0)
64
65
 
65
66
  /* define chunk processing function */
66
- let timer: ReturnType<typeof setTimeout> | null = null
67
67
  const processChunk = (chunkData: Float32Array) => {
68
68
  /* update internal audio sample sliding window */
69
69
  const newWindow = new Float32Array(sampleWindowSize)
@@ -86,11 +86,11 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
86
86
  calculateLoudnessRange: false,
87
87
  calculateTruePeak: false
88
88
  })
89
- lufss = lufs.shortTerm ? lufs.shortTerm[0] : 0
89
+ lufss = lufs.shortTerm ? lufs.shortTerm[0] : -60
90
90
  rms = getRMS(audioData, { asDB: true })
91
- if (timer !== null)
92
- clearTimeout(timer)
93
- timer = setTimeout(() => {
91
+ if (this.silenceTimer !== null)
92
+ clearTimeout(this.silenceTimer)
93
+ this.silenceTimer = setTimeout(() => {
94
94
  lufss = -60
95
95
  rms = -60
96
96
  }, 500)
@@ -117,7 +117,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
117
117
  this.sendResponse([ "meter", "LUFS-S", lufss ])
118
118
  this.sendResponse([ "meter", "RMS", rms ])
119
119
  if (this.params.dashboard !== "")
120
- this.dashboardInfo("audio", this.params.dashboard, "final", lufss)
120
+ this.sendDashboard("audio", this.params.dashboard, "final", lufss)
121
121
  }, this.params.interval)
122
122
 
123
123
  /* provide Duplex stream and internally attach to meter */
@@ -172,9 +172,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
172
172
 
173
173
  /* close node */
174
174
  async close () {
175
- /* indicate destruction */
176
- this.destroyed = true
177
-
178
175
  /* stop intervals */
179
176
  if (this.emitInterval !== null) {
180
177
  clearInterval(this.emitInterval)
@@ -184,11 +181,18 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
184
181
  clearInterval(this.calcInterval)
185
182
  this.calcInterval = null
186
183
  }
184
+ if (this.silenceTimer !== null) {
185
+ clearTimeout(this.silenceTimer)
186
+ this.silenceTimer = null
187
+ }
187
188
 
188
189
  /* close stream */
189
190
  if (this.stream !== null) {
190
191
  this.stream.destroy()
191
192
  this.stream = null
192
193
  }
194
+
195
+ /* indicate destruction */
196
+ this.destroyed = true
193
197
  }
194
198
  }
@@ -43,9 +43,10 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
43
43
  throw new Error("mute: node already destroyed")
44
44
  try {
45
45
  if (params.length === 2 && params[0] === "mode") {
46
- if (!params[1].match(/^(?:none|silenced|unplugged)$/))
46
+ if (typeof params[1] !== "string" ||
47
+ !params[1].match(/^(?:none|silenced|unplugged)$/))
47
48
  throw new Error("mute: invalid mode argument in external request")
48
- const muteMode: MuteMode = params[1] as MuteMode
49
+ const muteMode = params[1] as MuteMode
49
50
  this.setMuteMode(muteMode)
50
51
  this.sendResponse([ "mute", "mode", muteMode ])
51
52
  }
@@ -0,0 +1,62 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* internal dependencies */
8
+ import { parentPort } from "node:worker_threads"
9
+
10
+ /* external dependencies */
11
+ import { type DenoiseState, Rnnoise } from "@shiguredo/rnnoise-wasm"
12
+
13
+ /* WASM state */
14
+ let rnnoise: Rnnoise
15
+ let denoiseState: DenoiseState
16
+
17
+ /* global initialization */
18
+ ;(async () => {
19
+ try {
20
+ rnnoise = await Rnnoise.load()
21
+ denoiseState = rnnoise.createDenoiseState()
22
+ parentPort!.postMessage({ type: "ready" })
23
+ }
24
+ catch (err) {
25
+ parentPort!.postMessage({ type: "failed", message: `failed to initialize RNNoise: ${err}` })
26
+ process.exit(1)
27
+ }
28
+ })()
29
+
30
+ /* receive messages */
31
+ parentPort!.on("message", (msg) => {
32
+ if (msg.type === "process") {
33
+ /* process a single audio frame */
34
+ const { id, data } = msg
35
+
36
+ /* convert regular Int16Array [-32768,32768]
37
+ to unusual non-normalized Float32Array [-32768,32768]
38
+ as required by RNNoise */
39
+ const f32a = new Float32Array(data.length)
40
+ for (let i = 0; i < data.length; i++)
41
+ f32a[i] = data[i]
42
+
43
+ /* process frame with RNNoise WASM */
44
+ denoiseState.processFrame(f32a)
45
+
46
+ /* convert back Float32Array to Int16Array */
47
+ const i16 = new Int16Array(data.length)
48
+ for (let i = 0; i < data.length; i++)
49
+ i16[i] = Math.round(f32a[i])
50
+
51
+ parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
52
+ }
53
+ else if (msg.type === "close") {
54
+ /* shutdown this process */
55
+ try {
56
+ denoiseState.destroy()
57
+ }
58
+ finally {
59
+ process.exit(0)
60
+ }
61
+ }
62
+ })