speechflow 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +165 -22
- package/dst/speechflow-node-a2a-gender.d.ts +2 -0
- package/dst/speechflow-node-a2a-gender.js +137 -59
- package/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/dst/speechflow-node-a2a-meter.d.ts +3 -1
- package/dst/speechflow-node-a2a-meter.js +79 -35
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2a-mute.d.ts +1 -0
- package/dst/speechflow-node-a2a-mute.js +37 -11
- package/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/dst/speechflow-node-a2a-vad.d.ts +3 -0
- package/dst/speechflow-node-a2a-vad.js +194 -96
- package/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/dst/speechflow-node-a2a-wav.js +27 -11
- package/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +4 -0
- package/dst/speechflow-node-a2t-deepgram.js +141 -43
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +2 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +61 -12
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +1 -0
- package/dst/speechflow-node-t2a-kokoro.js +10 -4
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/dst/speechflow-node-t2t-deepl.js +8 -4
- package/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/dst/speechflow-node-t2t-format.js +2 -2
- package/dst/speechflow-node-t2t-format.js.map +1 -1
- package/dst/speechflow-node-t2t-ollama.js +1 -1
- package/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/dst/speechflow-node-t2t-openai.js +1 -1
- package/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +1 -1
- package/dst/speechflow-node-t2t-sentence.js +35 -24
- package/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/dst/speechflow-node-t2t-subtitle.js +85 -17
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-t2t-transformers.js +2 -2
- package/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/dst/speechflow-node-x2x-filter.js +4 -4
- package/dst/speechflow-node-x2x-trace.js +1 -1
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +12 -8
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node-xio-file.js +9 -3
- package/dst/speechflow-node-xio-file.js.map +1 -1
- package/dst/speechflow-node-xio-mqtt.js +5 -2
- package/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/dst/speechflow-node-xio-websocket.js +11 -11
- package/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/dst/speechflow-utils.d.ts +5 -0
- package/dst/speechflow-utils.js +77 -44
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +104 -34
- package/dst/speechflow.js.map +1 -1
- package/etc/eslint.mjs +1 -2
- package/etc/speechflow.yaml +18 -7
- package/etc/stx.conf +3 -3
- package/package.json +14 -13
- package/src/speechflow-node-a2a-gender.ts +148 -64
- package/src/speechflow-node-a2a-meter.ts +87 -40
- package/src/speechflow-node-a2a-mute.ts +39 -11
- package/src/speechflow-node-a2a-vad.ts +206 -100
- package/src/speechflow-node-a2a-wav.ts +27 -11
- package/src/speechflow-node-a2t-deepgram.ts +148 -45
- package/src/speechflow-node-t2a-elevenlabs.ts +65 -12
- package/src/speechflow-node-t2a-kokoro.ts +11 -4
- package/src/speechflow-node-t2t-deepl.ts +9 -4
- package/src/speechflow-node-t2t-format.ts +2 -2
- package/src/speechflow-node-t2t-ollama.ts +1 -1
- package/src/speechflow-node-t2t-openai.ts +1 -1
- package/src/speechflow-node-t2t-sentence.ts +38 -27
- package/src/speechflow-node-t2t-subtitle.ts +62 -15
- package/src/speechflow-node-t2t-transformers.ts +4 -3
- package/src/speechflow-node-x2x-filter.ts +4 -4
- package/src/speechflow-node-x2x-trace.ts +1 -1
- package/src/speechflow-node-xio-device.ts +12 -8
- package/src/speechflow-node-xio-file.ts +9 -3
- package/src/speechflow-node-xio-mqtt.ts +5 -2
- package/src/speechflow-node-xio-websocket.ts +12 -12
- package/src/speechflow-utils.ts +78 -44
- package/src/speechflow.ts +117 -36
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "speechflow",
|
|
3
|
-
"version": "1.3.
|
|
4
|
-
"x-stdver": "1.3.
|
|
5
|
-
"x-release": "2025-
|
|
3
|
+
"version": "1.3.2",
|
|
4
|
+
"x-stdver": "1.3.2-GA",
|
|
5
|
+
"x-release": "2025-08-04",
|
|
6
6
|
"homepage": "https://github.com/rse/speechflow",
|
|
7
7
|
"description": "Speech Processing Flow Graph",
|
|
8
8
|
"license": "GPL-3.0-only",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"cli-io": "0.9.13",
|
|
20
20
|
"yargs": "18.0.0",
|
|
21
|
-
"flowlink": "0.
|
|
21
|
+
"flowlink": "1.0.0",
|
|
22
22
|
"js-yaml": "4.1.0",
|
|
23
23
|
"@gpeng/naudiodon": "2.4.1",
|
|
24
24
|
"@deepgram/sdk": "4.11.1",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"@elevenlabs/elevenlabs-js": "2.7.0",
|
|
27
27
|
"stream-transform": "3.4.0",
|
|
28
28
|
"get-stream": "9.0.1",
|
|
29
|
-
"@dotenvx/dotenvx": "1.48.
|
|
29
|
+
"@dotenvx/dotenvx": "1.48.4",
|
|
30
30
|
"speex-resampler": "3.0.1",
|
|
31
31
|
"object-path": "0.11.8",
|
|
32
32
|
"ws": "8.18.3",
|
|
@@ -34,23 +34,24 @@
|
|
|
34
34
|
"utf-8-validate": "6.0.5",
|
|
35
35
|
"@hapi/hapi": "21.4.0",
|
|
36
36
|
"@hapi/boom": "10.0.1",
|
|
37
|
+
"@hapi/inert": "7.1.0",
|
|
37
38
|
"hapi-plugin-header": "1.1.8",
|
|
38
39
|
"hapi-plugin-websocket": "2.4.11",
|
|
39
40
|
"@opensumi/reconnecting-websocket": "4.4.0",
|
|
40
41
|
"ollama": "0.5.16",
|
|
41
|
-
"openai": "5.
|
|
42
|
+
"openai": "5.11.0",
|
|
42
43
|
"@rse/ffmpeg": "1.4.2",
|
|
43
44
|
"ffmpeg-stream": "1.0.1",
|
|
44
45
|
"installed-packages": "1.0.13",
|
|
45
46
|
"syspath": "1.0.8",
|
|
46
47
|
"wav": "1.0.2",
|
|
47
|
-
"mqtt": "5.
|
|
48
|
+
"mqtt": "5.14.0",
|
|
48
49
|
"cbor2": "2.0.1",
|
|
49
50
|
"arktype": "2.1.20",
|
|
50
51
|
"pure-uuid": "1.8.1",
|
|
51
52
|
"wavefile": "11.0.0",
|
|
52
53
|
"audio-inspect": "0.0.3",
|
|
53
|
-
"@huggingface/transformers": "3.7.
|
|
54
|
+
"@huggingface/transformers": "3.7.1",
|
|
54
55
|
"kokoro-js": "1.2.1",
|
|
55
56
|
"@ericedouard/vad-node-realtime": "0.2.0",
|
|
56
57
|
"luxon": "3.7.1",
|
|
@@ -67,10 +68,10 @@
|
|
|
67
68
|
"eslint-plugin-node": "11.1.0",
|
|
68
69
|
"@typescript-eslint/eslint-plugin": "8.38.0",
|
|
69
70
|
"@typescript-eslint/parser": "8.38.0",
|
|
70
|
-
"oxlint": "1.
|
|
71
|
-
"eslint-plugin-oxlint": "1.
|
|
71
|
+
"oxlint": "1.9.0",
|
|
72
|
+
"eslint-plugin-oxlint": "1.9.0",
|
|
72
73
|
"@biomejs/biome": "2.0.6",
|
|
73
|
-
"eslint-config-biome": "1.
|
|
74
|
+
"eslint-config-biome": "2.1.3",
|
|
74
75
|
|
|
75
76
|
"@types/node": "24.1.0",
|
|
76
77
|
"@types/yargs": "17.0.33",
|
|
@@ -79,7 +80,7 @@
|
|
|
79
80
|
"@types/ws": "8.18.1",
|
|
80
81
|
"@types/resolve": "1.20.6",
|
|
81
82
|
"@types/wav": "1.0.4",
|
|
82
|
-
"@types/luxon": "3.
|
|
83
|
+
"@types/luxon": "3.7.1",
|
|
83
84
|
"@types/wrap-text": "1.0.2",
|
|
84
85
|
|
|
85
86
|
"patch-package": "8.0.0",
|
|
@@ -95,7 +96,7 @@
|
|
|
95
96
|
"overrides": {
|
|
96
97
|
"@huggingface/transformers": { "onnxruntime-node": "1.23.0-dev.20250703-7fc6235861" }
|
|
97
98
|
},
|
|
98
|
-
"upd": [ "!@biomejs/biome" ],
|
|
99
|
+
"upd": [ "!@biomejs/biome", "!typescript" ],
|
|
99
100
|
"engines": {
|
|
100
101
|
"node": ">=22.0.0"
|
|
101
102
|
},
|
|
@@ -39,6 +39,8 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
39
39
|
private queueAC = this.queue.pointerUse("ac")
|
|
40
40
|
private queueSend = this.queue.pointerUse("send")
|
|
41
41
|
private shutdown = false
|
|
42
|
+
private workingOffTimer: ReturnType<typeof setTimeout> | null = null
|
|
43
|
+
private progressInterval: ReturnType<typeof setInterval> | null = null
|
|
42
44
|
|
|
43
45
|
/* construct node */
|
|
44
46
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -60,6 +62,9 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
60
62
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
61
63
|
throw new Error("Gender node currently supports PCM-S16LE audio only")
|
|
62
64
|
|
|
65
|
+
/* clear shutdown flag */
|
|
66
|
+
this.shutdown = false
|
|
67
|
+
|
|
63
68
|
/* pass-through logging */
|
|
64
69
|
const log = (level: string, msg: string) => { this.log(level, msg) }
|
|
65
70
|
|
|
@@ -69,6 +74,8 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
69
74
|
/* track download progress when instantiating Transformers engine and model */
|
|
70
75
|
const progressState = new Map<string, number>()
|
|
71
76
|
const progressCallback: Transformers.ProgressCallback = (progress: any) => {
|
|
77
|
+
if (this.shutdown)
|
|
78
|
+
return
|
|
72
79
|
let artifact = model
|
|
73
80
|
if (typeof progress.file === "string")
|
|
74
81
|
artifact += `:${progress.file}`
|
|
@@ -80,31 +87,54 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
80
87
|
if (percent > 0)
|
|
81
88
|
progressState.set(artifact, percent)
|
|
82
89
|
}
|
|
83
|
-
|
|
90
|
+
this.progressInterval = setInterval(() => {
|
|
91
|
+
if (this.shutdown)
|
|
92
|
+
return
|
|
84
93
|
for (const [ artifact, percent ] of progressState) {
|
|
85
94
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
86
95
|
if (percent >= 1.0)
|
|
87
96
|
progressState.delete(artifact)
|
|
88
97
|
}
|
|
89
98
|
}, 1000)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
99
|
+
try {
|
|
100
|
+
const pipelinePromise = Transformers.pipeline("audio-classification", model, {
|
|
101
|
+
cache_dir: path.join(this.config.cacheDir, "gender"),
|
|
102
|
+
dtype: "q4",
|
|
103
|
+
device: "auto",
|
|
104
|
+
progress_callback: progressCallback
|
|
105
|
+
})
|
|
106
|
+
const timeoutPromise = new Promise((resolve, reject) => setTimeout(() =>
|
|
107
|
+
reject(new Error("model initialization timeout")), 30 * 1000))
|
|
108
|
+
this.classifier = await Promise.race([
|
|
109
|
+
pipelinePromise, timeoutPromise
|
|
110
|
+
]) as Transformers.AudioClassificationPipeline
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
if (this.progressInterval) {
|
|
114
|
+
clearInterval(this.progressInterval)
|
|
115
|
+
this.progressInterval = null
|
|
116
|
+
}
|
|
117
|
+
throw new Error(`failed to initialize classifier pipeline: ${error}`)
|
|
118
|
+
}
|
|
119
|
+
if (this.progressInterval) {
|
|
120
|
+
clearInterval(this.progressInterval)
|
|
121
|
+
this.progressInterval = null
|
|
122
|
+
}
|
|
100
123
|
if (this.classifier === null)
|
|
101
124
|
throw new Error("failed to instantiate classifier pipeline")
|
|
102
125
|
|
|
103
126
|
/* classify a single large-enough concatenated audio frame */
|
|
104
127
|
const classify = async (data: Float32Array) => {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
128
|
+
if (this.shutdown || this.classifier === null)
|
|
129
|
+
throw new Error("classifier shutdown during operation")
|
|
130
|
+
const classifyPromise = this.classifier(data)
|
|
131
|
+
const timeoutPromise = new Promise((resolve, reject) => setTimeout(() =>
|
|
132
|
+
reject(new Error("classification timeout")), 30 * 1000))
|
|
133
|
+
const result = await Promise.race([ classifyPromise, timeoutPromise ]) as
|
|
134
|
+
Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
|
|
135
|
+
const classified = Array.isArray(result) ?
|
|
136
|
+
result as Transformers.AudioClassificationOutput :
|
|
137
|
+
[ result ]
|
|
108
138
|
const c1 = classified.find((c: any) => c.label === "male")
|
|
109
139
|
const c2 = classified.find((c: any) => c.label === "female")
|
|
110
140
|
const male = c1 ? c1.score : 0.0
|
|
@@ -119,57 +149,65 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
119
149
|
const frameWindowDuration = 0.5
|
|
120
150
|
const frameWindowSamples = frameWindowDuration * sampleRateTarget
|
|
121
151
|
let lastGender = ""
|
|
122
|
-
let workingOffTimer: ReturnType<typeof setTimeout> | null = null
|
|
123
152
|
let workingOff = false
|
|
124
153
|
const workOffQueue = async () => {
|
|
125
154
|
/* control working off round */
|
|
126
155
|
if (workingOff || this.shutdown)
|
|
127
156
|
return
|
|
128
157
|
workingOff = true
|
|
129
|
-
if (workingOffTimer !== null) {
|
|
130
|
-
clearTimeout(workingOffTimer)
|
|
131
|
-
workingOffTimer = null
|
|
158
|
+
if (this.workingOffTimer !== null) {
|
|
159
|
+
clearTimeout(this.workingOffTimer)
|
|
160
|
+
this.workingOffTimer = null
|
|
132
161
|
}
|
|
133
162
|
this.queue.off("write", workOffQueue)
|
|
134
163
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
break
|
|
145
|
-
if ((samples + element.data.length) < frameWindowSamples) {
|
|
146
|
-
data.set(element.data, samples)
|
|
147
|
-
samples += element.data.length
|
|
148
|
-
}
|
|
149
|
-
pos++
|
|
150
|
-
}
|
|
151
|
-
if (pos0 < pos && samples > frameWindowSamples * 0.75) {
|
|
152
|
-
const gender = await classify(data)
|
|
153
|
-
const posM = pos0 + Math.trunc((pos - pos0) * 0.25)
|
|
154
|
-
while (pos0 < posM && pos0 < posL) {
|
|
155
|
-
const element = this.queueAC.peek(pos0)
|
|
164
|
+
try {
|
|
165
|
+
let pos0 = this.queueAC.position()
|
|
166
|
+
const posL = this.queueAC.maxPosition()
|
|
167
|
+
const data = new Float32Array(frameWindowSamples)
|
|
168
|
+
data.fill(0)
|
|
169
|
+
let samples = 0
|
|
170
|
+
let pos = pos0
|
|
171
|
+
while (pos < posL && samples < frameWindowSamples && !this.shutdown) {
|
|
172
|
+
const element = this.queueAC.peek(pos)
|
|
156
173
|
if (element === undefined || element.type !== "audio-frame")
|
|
157
174
|
break
|
|
158
|
-
element.
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
175
|
+
if ((samples + element.data.length) < frameWindowSamples) {
|
|
176
|
+
data.set(element.data, samples)
|
|
177
|
+
samples += element.data.length
|
|
178
|
+
}
|
|
179
|
+
pos++
|
|
162
180
|
}
|
|
163
|
-
if (
|
|
164
|
-
|
|
165
|
-
|
|
181
|
+
if (pos0 < pos && samples > frameWindowSamples * 0.75 && !this.shutdown) {
|
|
182
|
+
const gender = await classify(data)
|
|
183
|
+
if (this.shutdown)
|
|
184
|
+
return
|
|
185
|
+
const posM = pos0 + Math.trunc((pos - pos0) * 0.25)
|
|
186
|
+
while (pos0 < posM && pos0 < posL && !this.shutdown) {
|
|
187
|
+
const element = this.queueAC.peek(pos0)
|
|
188
|
+
if (element === undefined || element.type !== "audio-frame")
|
|
189
|
+
break
|
|
190
|
+
element.gender = gender
|
|
191
|
+
this.queueAC.touch()
|
|
192
|
+
this.queueAC.walk(+1)
|
|
193
|
+
pos0++
|
|
194
|
+
}
|
|
195
|
+
if (lastGender !== gender && !this.shutdown) {
|
|
196
|
+
log("info", `gender now recognized as <${gender}>`)
|
|
197
|
+
lastGender = gender
|
|
198
|
+
}
|
|
166
199
|
}
|
|
167
200
|
}
|
|
201
|
+
catch (error) {
|
|
202
|
+
log("error", `gender classification error: ${error}`)
|
|
203
|
+
}
|
|
168
204
|
|
|
169
205
|
/* re-initiate working off round */
|
|
170
206
|
workingOff = false
|
|
171
|
-
|
|
172
|
-
|
|
207
|
+
if (!this.shutdown) {
|
|
208
|
+
this.workingOffTimer = setTimeout(workOffQueue, 100)
|
|
209
|
+
this.queue.once("write", workOffQueue)
|
|
210
|
+
}
|
|
173
211
|
}
|
|
174
212
|
this.queue.once("write", workOffQueue)
|
|
175
213
|
|
|
@@ -183,28 +221,41 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
183
221
|
|
|
184
222
|
/* receive audio chunk (writable side of stream) */
|
|
185
223
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
224
|
+
if (self.shutdown) {
|
|
225
|
+
callback(new Error("stream already destroyed"))
|
|
226
|
+
return
|
|
227
|
+
}
|
|
186
228
|
if (!Buffer.isBuffer(chunk.payload))
|
|
187
229
|
callback(new Error("expected audio input as Buffer chunks"))
|
|
188
230
|
else if (chunk.payload.byteLength === 0)
|
|
189
231
|
callback()
|
|
190
232
|
else {
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
/* queue chunk and converted data */
|
|
200
|
-
self.queueRecv.append({ type: "audio-frame", chunk, data })
|
|
233
|
+
try {
|
|
234
|
+
/* convert audio samples from PCM/I16/48KHz to PCM/F32/16KHz */
|
|
235
|
+
let data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
|
|
236
|
+
const wav = new WaveFile()
|
|
237
|
+
wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
|
|
238
|
+
wav.toSampleRate(sampleRateTarget, { method: "cubic" })
|
|
239
|
+
data = wav.getSamples(false, Float32Array<ArrayBuffer>) as
|
|
240
|
+
any as Float32Array<ArrayBuffer>
|
|
201
241
|
|
|
202
|
-
|
|
242
|
+
/* queue chunk and converted data */
|
|
243
|
+
self.queueRecv.append({ type: "audio-frame", chunk, data })
|
|
244
|
+
callback()
|
|
245
|
+
}
|
|
246
|
+
catch (error) {
|
|
247
|
+
callback(error instanceof Error ? error : new Error("audio processing failed"))
|
|
248
|
+
}
|
|
203
249
|
}
|
|
204
250
|
},
|
|
205
251
|
|
|
206
252
|
/* receive no more audio chunks (writable side of stream) */
|
|
207
253
|
final (callback) {
|
|
254
|
+
if (self.shutdown) {
|
|
255
|
+
callback()
|
|
256
|
+
return
|
|
257
|
+
}
|
|
258
|
+
|
|
208
259
|
/* signal end of file */
|
|
209
260
|
self.queueRecv.append({ type: "audio-eof" })
|
|
210
261
|
callback()
|
|
@@ -214,8 +265,10 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
214
265
|
read (_size) {
|
|
215
266
|
/* flush pending audio chunks */
|
|
216
267
|
const flushPendingChunks = () => {
|
|
217
|
-
if (self.shutdown)
|
|
268
|
+
if (self.shutdown) {
|
|
269
|
+
this.push(null)
|
|
218
270
|
return
|
|
271
|
+
}
|
|
219
272
|
const element = self.queueSend.peek()
|
|
220
273
|
if (element !== undefined
|
|
221
274
|
&& element.type === "audio-eof")
|
|
@@ -224,6 +277,10 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
224
277
|
&& element.type === "audio-frame"
|
|
225
278
|
&& element.gender !== undefined) {
|
|
226
279
|
while (true) {
|
|
280
|
+
if (self.shutdown) {
|
|
281
|
+
this.push(null)
|
|
282
|
+
return
|
|
283
|
+
}
|
|
227
284
|
const element = self.queueSend.peek()
|
|
228
285
|
if (element === undefined)
|
|
229
286
|
break
|
|
@@ -242,7 +299,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
242
299
|
self.queue.trim()
|
|
243
300
|
}
|
|
244
301
|
}
|
|
245
|
-
else
|
|
302
|
+
else if (!self.shutdown)
|
|
246
303
|
self.queue.once("write", flushPendingChunks)
|
|
247
304
|
}
|
|
248
305
|
flushPendingChunks()
|
|
@@ -255,16 +312,43 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
255
312
|
/* indicate shutdown */
|
|
256
313
|
this.shutdown = true
|
|
257
314
|
|
|
315
|
+
/* cleanup working-off timer */
|
|
316
|
+
if (this.workingOffTimer !== null) {
|
|
317
|
+
clearTimeout(this.workingOffTimer)
|
|
318
|
+
this.workingOffTimer = null
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/* cleanup progress interval */
|
|
322
|
+
if (this.progressInterval !== null) {
|
|
323
|
+
clearInterval(this.progressInterval)
|
|
324
|
+
this.progressInterval = null
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/* remove all event listeners */
|
|
328
|
+
this.queue.removeAllListeners("write")
|
|
329
|
+
|
|
258
330
|
/* close stream */
|
|
259
331
|
if (this.stream !== null) {
|
|
260
332
|
this.stream.destroy()
|
|
261
333
|
this.stream = null
|
|
262
334
|
}
|
|
263
335
|
|
|
264
|
-
/*
|
|
336
|
+
/* cleanup classifier */
|
|
265
337
|
if (this.classifier !== null) {
|
|
266
|
-
|
|
338
|
+
try {
|
|
339
|
+
const disposePromise = this.classifier.dispose()
|
|
340
|
+
const timeoutPromise = new Promise((resolve) => setTimeout(resolve, 5000))
|
|
341
|
+
await Promise.race([ disposePromise, timeoutPromise ])
|
|
342
|
+
}
|
|
343
|
+
catch (error) {
|
|
344
|
+
this.log("warning", `error during classifier cleanup: ${error}`)
|
|
345
|
+
}
|
|
267
346
|
this.classifier = null
|
|
268
347
|
}
|
|
348
|
+
|
|
349
|
+
/* cleanup queue pointers */
|
|
350
|
+
this.queue.pointerDelete("recv")
|
|
351
|
+
this.queue.pointerDelete("ac")
|
|
352
|
+
this.queue.pointerDelete("send")
|
|
269
353
|
}
|
|
270
|
-
}
|
|
354
|
+
}
|
|
@@ -20,7 +20,9 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
20
20
|
public static name = "meter"
|
|
21
21
|
|
|
22
22
|
/* internal state */
|
|
23
|
-
interval: ReturnType<typeof setInterval> | null = null
|
|
23
|
+
private interval: ReturnType<typeof setInterval> | null = null
|
|
24
|
+
private destroyed = false
|
|
25
|
+
private pendingCalculations = new Set<ReturnType<typeof setTimeout>>()
|
|
24
26
|
|
|
25
27
|
/* construct node */
|
|
26
28
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -42,6 +44,9 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
42
44
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
43
45
|
throw new Error("meter node currently supports PCM-S16LE audio only")
|
|
44
46
|
|
|
47
|
+
/* clear destruction flag */
|
|
48
|
+
this.destroyed = false
|
|
49
|
+
|
|
45
50
|
/* internal state */
|
|
46
51
|
const sampleWindowDuration = 3 /* LUFS-S requires 3s */
|
|
47
52
|
const sampleWindowSize = this.config.audioSampleRate * sampleWindowDuration
|
|
@@ -52,12 +57,14 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
52
57
|
|
|
53
58
|
/* setup loudness emitting interval */
|
|
54
59
|
this.interval = setInterval(() => {
|
|
60
|
+
if (this.destroyed)
|
|
61
|
+
return
|
|
55
62
|
this.log("debug", `LUFS-S: ${lufss.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
|
|
56
63
|
this.sendResponse([ "meter", "LUFS-S", lufss ])
|
|
57
64
|
this.sendResponse([ "meter", "RMS", rms ])
|
|
58
65
|
}, this.params.interval)
|
|
59
66
|
|
|
60
|
-
/* provide Duplex stream and internally attach to
|
|
67
|
+
/* provide Duplex stream and internally attach to meter */
|
|
61
68
|
const self = this
|
|
62
69
|
this.stream = new Stream.Transform({
|
|
63
70
|
writableObjectMode: true,
|
|
@@ -67,46 +74,78 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
67
74
|
|
|
68
75
|
/* transform audio chunk */
|
|
69
76
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
77
|
+
if (self.destroyed) {
|
|
78
|
+
callback(new Error("stream already destroyed"))
|
|
79
|
+
return
|
|
80
|
+
}
|
|
70
81
|
if (!Buffer.isBuffer(chunk.payload))
|
|
71
82
|
callback(new Error("expected audio input as Buffer chunks"))
|
|
72
83
|
else if (chunk.payload.byteLength === 0)
|
|
73
84
|
callback()
|
|
74
85
|
else {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
86
|
+
try {
|
|
87
|
+
/* convert audio samples from PCM/I16 to PCM/F32 */
|
|
88
|
+
const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
|
|
89
|
+
|
|
90
|
+
/* update internal audio sample sliding window */
|
|
91
|
+
if (data.length >= sampleWindowSize)
|
|
92
|
+
/* new data is larger than window, so just use the tail */
|
|
93
|
+
sampleWindow = data.slice(data.length - sampleWindowSize)
|
|
94
|
+
else {
|
|
95
|
+
/* shift existing data and append new data */
|
|
96
|
+
const newWindow = new Float32Array(sampleWindowSize)
|
|
97
|
+
const keepSize = sampleWindowSize - data.length
|
|
98
|
+
newWindow.set(sampleWindow.slice(sampleWindow.length - keepSize), 0)
|
|
99
|
+
newWindow.set(data, keepSize)
|
|
100
|
+
sampleWindow = newWindow
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/* asynchronously calculate the LUFS-S metric */
|
|
104
|
+
const calculator = setTimeout(() => {
|
|
105
|
+
if (self.destroyed)
|
|
106
|
+
return
|
|
107
|
+
try {
|
|
108
|
+
self.pendingCalculations.delete(calculator)
|
|
109
|
+
const audioData = {
|
|
110
|
+
sampleRate: self.config.audioSampleRate,
|
|
111
|
+
numberOfChannels: self.config.audioChannels,
|
|
112
|
+
channelData: [ sampleWindow ],
|
|
113
|
+
duration: sampleWindowDuration,
|
|
114
|
+
length: sampleWindow.length
|
|
115
|
+
} satisfies AudioData
|
|
116
|
+
const lufs = getLUFS(audioData, {
|
|
117
|
+
channelMode: self.config.audioChannels === 1 ? "mono" : "stereo",
|
|
118
|
+
calculateShortTerm: true,
|
|
119
|
+
calculateMomentary: false,
|
|
120
|
+
calculateLoudnessRange: false,
|
|
121
|
+
calculateTruePeak: false
|
|
122
|
+
})
|
|
123
|
+
if (!self.destroyed) {
|
|
124
|
+
lufss = lufs.shortTerm ? lufs.shortTerm[0] : 0
|
|
125
|
+
rms = getRMS(audioData, { asDB: true })
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
if (!self.destroyed)
|
|
130
|
+
self.log("warning", `meter calculation error: ${error}`)
|
|
131
|
+
}
|
|
132
|
+
}, 0)
|
|
133
|
+
self.pendingCalculations.add(calculator)
|
|
134
|
+
|
|
135
|
+
/* pass-through original audio chunk */
|
|
136
|
+
this.push(chunk)
|
|
137
|
+
callback()
|
|
138
|
+
}
|
|
139
|
+
catch (error) {
|
|
140
|
+
callback(error instanceof Error ? error : new Error("Meter processing failed"))
|
|
141
|
+
}
|
|
107
142
|
}
|
|
108
143
|
},
|
|
109
144
|
final (callback) {
|
|
145
|
+
if (self.destroyed) {
|
|
146
|
+
callback()
|
|
147
|
+
return
|
|
148
|
+
}
|
|
110
149
|
this.push(null)
|
|
111
150
|
callback()
|
|
112
151
|
}
|
|
@@ -115,16 +154,24 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
115
154
|
|
|
116
155
|
/* close node */
|
|
117
156
|
async close () {
|
|
118
|
-
/*
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
157
|
+
/* indicate destruction */
|
|
158
|
+
this.destroyed = true
|
|
159
|
+
|
|
160
|
+
/* clear all pending calculations */
|
|
161
|
+
for (const timeout of this.pendingCalculations)
|
|
162
|
+
clearTimeout(timeout)
|
|
163
|
+
this.pendingCalculations.clear()
|
|
123
164
|
|
|
124
165
|
/* stop interval */
|
|
125
166
|
if (this.interval !== null) {
|
|
126
167
|
clearInterval(this.interval)
|
|
127
168
|
this.interval = null
|
|
128
169
|
}
|
|
170
|
+
|
|
171
|
+
/* close stream */
|
|
172
|
+
if (this.stream !== null) {
|
|
173
|
+
this.stream.destroy()
|
|
174
|
+
this.stream = null
|
|
175
|
+
}
|
|
129
176
|
}
|
|
130
|
-
}
|
|
177
|
+
}
|