speechflow 1.6.4 → 1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +5 -3
- package/etc/speechflow.yaml +15 -13
- package/package.json +5 -5
- package/speechflow-cli/dst/speechflow-main-api.js +3 -7
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +1 -1
- package/speechflow-cli/dst/speechflow-main.js +6 -0
- package/speechflow-cli/dst/speechflow-main.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -21
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -21
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +35 -29
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +50 -34
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +0 -6
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +0 -6
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +0 -6
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +0 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +6 -6
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -2
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-audio.js +21 -0
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-error.js +7 -1
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +2 -2
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/package.json +17 -17
- package/speechflow-cli/src/speechflow-main-api.ts +6 -13
- package/speechflow-cli/src/speechflow-main-graph.ts +1 -1
- package/speechflow-cli/src/speechflow-main.ts +4 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -29
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -29
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +44 -39
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +58 -38
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +0 -6
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +0 -6
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +0 -6
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +0 -6
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +10 -14
- package/speechflow-cli/src/speechflow-node-xio-device.ts +3 -2
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +1 -1
- package/speechflow-cli/src/speechflow-util-audio.ts +30 -0
- package/speechflow-cli/src/speechflow-util-error.ts +9 -3
- package/speechflow-cli/src/speechflow-util-stream.ts +2 -2
- package/speechflow-ui-db/dst/index.js +20 -20
- package/speechflow-ui-db/package.json +8 -8
- package/speechflow-ui-db/src/app.vue +14 -5
- package/speechflow-ui-st/dst/index.js +455 -20
- package/speechflow-ui-st/package.json +9 -9
- package/speechflow-ui-st/src/app.vue +8 -3
- package/speechflow-cli/dst/speechflow-util-webaudio-wt.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +0 -124
- package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +0 -1
- package/speechflow-cli/dst/speechflow-util-webaudio.d.ts +0 -13
- package/speechflow-cli/dst/speechflow-util-webaudio.js +0 -137
- package/speechflow-cli/dst/speechflow-util-webaudio.js.map +0 -1
|
@@ -61,34 +61,6 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
61
61
|
return targetOut - levelDB
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
-
/* update envelope (smoothed amplitude contour) for single channel */
|
|
65
|
-
private updateEnvelopeForChannel (
|
|
66
|
-
chan: number,
|
|
67
|
-
samples: Float32Array,
|
|
68
|
-
attack: number,
|
|
69
|
-
release: number
|
|
70
|
-
): void {
|
|
71
|
-
/* fetch old envelope value */
|
|
72
|
-
if (this.env[chan] === undefined)
|
|
73
|
-
this.env[chan] = 1e-12
|
|
74
|
-
let env = this.env[chan]
|
|
75
|
-
|
|
76
|
-
/* calculate attack/release alpha values */
|
|
77
|
-
const alphaA = Math.exp(-1 / (attack * this.sampleRate))
|
|
78
|
-
const alphaR = Math.exp(-1 / (release * this.sampleRate))
|
|
79
|
-
|
|
80
|
-
/* iterate over all samples and calculate RMS */
|
|
81
|
-
for (const s of samples) {
|
|
82
|
-
const x = Math.abs(s)
|
|
83
|
-
const det = x * x
|
|
84
|
-
if (det > env)
|
|
85
|
-
env = alphaA * env + (1 - alphaA) * det
|
|
86
|
-
else
|
|
87
|
-
env = alphaR * env + (1 - alphaR) * det
|
|
88
|
-
}
|
|
89
|
-
this.env[chan] = Math.sqrt(Math.max(env, 1e-12))
|
|
90
|
-
}
|
|
91
|
-
|
|
92
64
|
/* process a single sample frame */
|
|
93
65
|
process(
|
|
94
66
|
inputs: Float32Array[][],
|
|
@@ -126,7 +98,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
126
98
|
|
|
127
99
|
/* update envelope per channel */
|
|
128
100
|
for (let ch = 0; ch < nCh; ch++)
|
|
129
|
-
this.updateEnvelopeForChannel(ch, input[ch], attackS, releaseS)
|
|
101
|
+
this.env[ch] = util.updateEnvelopeForChannel(this.env, this.sampleRate, ch, input[ch], attackS, releaseS)
|
|
130
102
|
|
|
131
103
|
/* determine linear value from decibel makeup value */
|
|
132
104
|
const makeUpLin = util.dB2lin(makeupDB)
|
|
@@ -93,7 +93,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
|
|
|
93
93
|
util.run("starting FFmpeg process", () => this.ffmpeg!.run())
|
|
94
94
|
|
|
95
95
|
/* establish a duplex stream and connect it to FFmpeg */
|
|
96
|
-
|
|
96
|
+
const ffmpegStream = Stream.Duplex.from({
|
|
97
97
|
writable: streamInput,
|
|
98
98
|
readable: streamOutput
|
|
99
99
|
})
|
|
@@ -101,7 +101,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
|
|
|
101
101
|
/* wrap streams with conversions for chunk vs plain audio */
|
|
102
102
|
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
103
103
|
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
|
|
104
|
-
this.stream = Stream.compose(wrapper1,
|
|
104
|
+
this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
/* close node */
|
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import path
|
|
9
|
-
import Stream
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
10
|
|
|
11
11
|
/* external dependencies */
|
|
12
|
-
import * as Transformers
|
|
13
|
-
import { WaveFile }
|
|
12
|
+
import * as Transformers from "@huggingface/transformers"
|
|
13
|
+
import { WaveFile } from "wavefile"
|
|
14
|
+
import { getRMS, AudioData } from "audio-inspect"
|
|
14
15
|
|
|
15
16
|
/* internal dependencies */
|
|
16
17
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -47,7 +48,10 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
47
48
|
|
|
48
49
|
/* declare node configuration parameters */
|
|
49
50
|
this.configure({
|
|
50
|
-
window:
|
|
51
|
+
window: { type: "number", pos: 0, val: 500 },
|
|
52
|
+
threshold: { type: "number", pos: 1, val: 0.50 },
|
|
53
|
+
hysteresis: { type: "number", pos: 2, val: 0.25 },
|
|
54
|
+
volumeThreshold: { type: "number", pos: 3, val: -45 }
|
|
51
55
|
})
|
|
52
56
|
|
|
53
57
|
/* declare node input/output format */
|
|
@@ -64,9 +68,6 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
64
68
|
/* clear shutdown flag */
|
|
65
69
|
this.shutdown = false
|
|
66
70
|
|
|
67
|
-
/* pass-through logging */
|
|
68
|
-
const log = this.log.bind(this)
|
|
69
|
-
|
|
70
71
|
/* the used model */
|
|
71
72
|
const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
|
|
72
73
|
|
|
@@ -102,24 +103,17 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
102
103
|
device: "auto",
|
|
103
104
|
progress_callback: progressCallback
|
|
104
105
|
})
|
|
105
|
-
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
|
106
|
-
const timeoutPromise = new Promise((resolve, reject) => {
|
|
107
|
-
timeoutId = setTimeout(() =>
|
|
108
|
-
reject(new Error("model initialization timeout")), 30 * 1000)
|
|
109
|
-
})
|
|
110
106
|
this.classifier = await Promise.race([
|
|
111
|
-
pipelinePromise,
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
clearTimeout(timeoutId)
|
|
115
|
-
}) as Transformers.AudioClassificationPipeline
|
|
107
|
+
pipelinePromise,
|
|
108
|
+
util.timeoutPromise(30 * 1000, "model initialization timeout")
|
|
109
|
+
]) as Transformers.AudioClassificationPipeline
|
|
116
110
|
}
|
|
117
111
|
catch (error) {
|
|
118
112
|
if (this.progressInterval) {
|
|
119
113
|
clearInterval(this.progressInterval)
|
|
120
114
|
this.progressInterval = null
|
|
121
115
|
}
|
|
122
|
-
throw new Error(`failed to initialize classifier pipeline: ${error}
|
|
116
|
+
throw new Error(`failed to initialize classifier pipeline: ${error}`, { cause: error })
|
|
123
117
|
}
|
|
124
118
|
if (this.progressInterval) {
|
|
125
119
|
clearInterval(this.progressInterval)
|
|
@@ -128,38 +122,49 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
128
122
|
if (this.classifier === null)
|
|
129
123
|
throw new Error("failed to instantiate classifier pipeline")
|
|
130
124
|
|
|
125
|
+
/* define sample rate required by model */
|
|
126
|
+
const sampleRateTarget = 16000
|
|
127
|
+
|
|
131
128
|
/* classify a single large-enough concatenated audio frame */
|
|
132
129
|
const classify = async (data: Float32Array) => {
|
|
133
130
|
if (this.shutdown || this.classifier === null)
|
|
134
131
|
throw new Error("classifier shutdown during operation")
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
}
|
|
132
|
+
|
|
133
|
+
/* check volume level and return "unknown" if too low
|
|
134
|
+
in order to avoid a wrong classificaton */
|
|
135
|
+
const audioData = {
|
|
136
|
+
sampleRate: sampleRateTarget,
|
|
137
|
+
numberOfChannels: 1,
|
|
138
|
+
channelData: [ data ],
|
|
139
|
+
duration: data.length / sampleRateTarget,
|
|
140
|
+
length: data.length
|
|
141
|
+
} satisfies AudioData
|
|
142
|
+
const rms = getRMS(audioData, { asDB: true })
|
|
143
|
+
if (rms < this.params.volumeThreshold)
|
|
144
|
+
return "unknown"
|
|
145
|
+
|
|
146
|
+
/* classify audio */
|
|
147
|
+
const result = await Promise.race([
|
|
148
|
+
this.classifier(data),
|
|
149
|
+
util.timeoutPromise(30 * 1000, "classification timeout")
|
|
150
|
+
]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
|
|
145
151
|
const classified = Array.isArray(result) ?
|
|
146
152
|
result as Transformers.AudioClassificationOutput :
|
|
147
153
|
[ result ]
|
|
148
|
-
const c1 = classified.find((c
|
|
149
|
-
const c2 = classified.find((c
|
|
154
|
+
const c1 = classified.find((c) => c.label === "male")
|
|
155
|
+
const c2 = classified.find((c) => c.label === "female")
|
|
150
156
|
const male = c1 ? c1.score : 0.0
|
|
151
157
|
const female = c2 ? c2.score : 0.0
|
|
152
|
-
|
|
158
|
+
const threshold = this.params.threshold
|
|
159
|
+
const hysteresis = this.params.hysteresis
|
|
160
|
+
if (male > threshold && male > female + hysteresis)
|
|
153
161
|
return "male"
|
|
154
|
-
else if (female >
|
|
162
|
+
else if (female > threshold && female > male + hysteresis)
|
|
155
163
|
return "female"
|
|
156
164
|
else
|
|
157
165
|
return "unknown"
|
|
158
166
|
}
|
|
159
167
|
|
|
160
|
-
/* define sample rate required by model */
|
|
161
|
-
const sampleRateTarget = 16000
|
|
162
|
-
|
|
163
168
|
/* work off queued audio frames */
|
|
164
169
|
const frameWindowDuration = this.params.window / 1000
|
|
165
170
|
const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
|
|
@@ -209,13 +214,13 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
209
214
|
pos0++
|
|
210
215
|
}
|
|
211
216
|
if (lastGender !== gender && !this.shutdown) {
|
|
212
|
-
log("info", `gender now recognized as <${gender}>`)
|
|
217
|
+
this.log("info", `gender now recognized as <${gender}>`)
|
|
213
218
|
lastGender = gender
|
|
214
219
|
}
|
|
215
220
|
}
|
|
216
221
|
}
|
|
217
222
|
catch (error) {
|
|
218
|
-
log("error", `gender classification error: ${error}`)
|
|
223
|
+
this.log("error", `gender classification error: ${error}`)
|
|
219
224
|
}
|
|
220
225
|
|
|
221
226
|
/* re-initiate working off round */
|
|
@@ -307,7 +312,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
307
312
|
&& element.gender === undefined)
|
|
308
313
|
break
|
|
309
314
|
const duration = util.audioArrayDuration(element.data)
|
|
310
|
-
log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
|
|
315
|
+
self.log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
|
|
311
316
|
element.chunk.meta.set("gender", element.gender)
|
|
312
317
|
this.push(element.chunk)
|
|
313
318
|
self.queueSend.walk(+1)
|
|
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
22
22
|
/* internal state */
|
|
23
23
|
private emitInterval: ReturnType<typeof setInterval> | null = null
|
|
24
24
|
private calcInterval: ReturnType<typeof setInterval> | null = null
|
|
25
|
-
private silenceTimer: ReturnType<typeof setTimeout>
|
|
25
|
+
private silenceTimer: ReturnType<typeof setTimeout> | null = null
|
|
26
26
|
private chunkBuffer = new Float32Array(0)
|
|
27
27
|
private destroyed = false
|
|
28
28
|
|
|
@@ -32,7 +32,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
32
32
|
|
|
33
33
|
/* declare node configuration parameters */
|
|
34
34
|
this.configure({
|
|
35
|
-
interval: { type: "number", pos: 0, val:
|
|
35
|
+
interval: { type: "number", pos: 0, val: 100 },
|
|
36
36
|
mode: { type: "string", pos: 1, val: "filter", match: /^(?:filter|sink)$/ },
|
|
37
37
|
dashboard: { type: "string", val: "" }
|
|
38
38
|
})
|
|
@@ -55,71 +55,91 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
55
55
|
this.destroyed = false
|
|
56
56
|
|
|
57
57
|
/* internal state */
|
|
58
|
-
|
|
58
|
+
let lufsm = -60
|
|
59
|
+
let rms = -60
|
|
60
|
+
|
|
61
|
+
/* chunk processing state for LUFS-M */
|
|
62
|
+
const sampleWindowDuration = 0.4 /* LUFS-M requires 400ms */
|
|
59
63
|
const sampleWindowSize = Math.floor(this.config.audioSampleRate * sampleWindowDuration)
|
|
60
64
|
const sampleWindow = new Float32Array(sampleWindowSize)
|
|
61
65
|
sampleWindow.fill(0, 0, sampleWindowSize)
|
|
62
|
-
let lufss = -60
|
|
63
|
-
let rms = -60
|
|
64
66
|
|
|
65
|
-
/* chunk processing state */
|
|
67
|
+
/* chunk processing state for RMS */
|
|
66
68
|
const chunkDuration = 0.050 /* meter update frequency is about 50ms */
|
|
67
69
|
const samplesPerChunk = Math.floor(this.config.audioSampleRate * chunkDuration)
|
|
68
70
|
this.chunkBuffer = new Float32Array(0)
|
|
69
71
|
|
|
70
|
-
/*
|
|
71
|
-
|
|
72
|
-
/*
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
/* setup chunking interval */
|
|
73
|
+
this.calcInterval = setInterval(() => {
|
|
74
|
+
/* short-circuit during destruction */
|
|
75
|
+
if (this.destroyed)
|
|
76
|
+
return
|
|
75
77
|
|
|
76
|
-
/*
|
|
77
|
-
|
|
78
|
+
/* short-circuit if still not enough chunk data */
|
|
79
|
+
if (this.chunkBuffer.length < samplesPerChunk)
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
/* grab the accumulated chunk data */
|
|
83
|
+
const chunkData = this.chunkBuffer
|
|
84
|
+
this.chunkBuffer = new Float32Array(0)
|
|
85
|
+
|
|
86
|
+
/* update internal audio sample sliding window for LUFS-S */
|
|
87
|
+
if (chunkData.length > sampleWindow.length)
|
|
88
|
+
sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
|
|
89
|
+
else {
|
|
90
|
+
sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
|
|
91
|
+
sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/* calculate the LUFS-M metric */
|
|
95
|
+
const audioDataLUFS = {
|
|
78
96
|
sampleRate: this.config.audioSampleRate,
|
|
79
97
|
numberOfChannels: this.config.audioChannels,
|
|
80
98
|
channelData: [ sampleWindow ],
|
|
81
99
|
duration: sampleWindowDuration,
|
|
82
100
|
length: sampleWindow.length
|
|
83
101
|
} satisfies AudioData
|
|
84
|
-
const lufs = getLUFS(
|
|
102
|
+
const lufs = getLUFS(audioDataLUFS, {
|
|
85
103
|
channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
|
|
86
|
-
calculateShortTerm:
|
|
87
|
-
calculateMomentary:
|
|
104
|
+
calculateShortTerm: false,
|
|
105
|
+
calculateMomentary: true,
|
|
88
106
|
calculateLoudnessRange: false,
|
|
89
107
|
calculateTruePeak: false
|
|
90
108
|
})
|
|
91
|
-
|
|
92
|
-
|
|
109
|
+
lufsm = lufs.momentary ? Math.max(-60, lufs.momentary[0]) : -60
|
|
110
|
+
|
|
111
|
+
/* calculate the RMS metric */
|
|
112
|
+
const totalSamples = chunkData.length / this.config.audioChannels
|
|
113
|
+
const duration = totalSamples / this.config.audioSampleRate
|
|
114
|
+
const audioDataRMS = {
|
|
115
|
+
sampleRate: this.config.audioSampleRate,
|
|
116
|
+
numberOfChannels: this.config.audioChannels,
|
|
117
|
+
channelData: [ chunkData ],
|
|
118
|
+
duration,
|
|
119
|
+
length: chunkData.length
|
|
120
|
+
} satisfies AudioData
|
|
121
|
+
rms = Math.max(-60, getRMS(audioDataRMS, {
|
|
122
|
+
asDB: true
|
|
123
|
+
}))
|
|
124
|
+
|
|
125
|
+
/* automatically clear measurement (in case no new measurements happen) */
|
|
93
126
|
if (this.silenceTimer !== null)
|
|
94
127
|
clearTimeout(this.silenceTimer)
|
|
95
128
|
this.silenceTimer = setTimeout(() => {
|
|
96
|
-
|
|
129
|
+
lufsm = -60
|
|
97
130
|
rms = -60
|
|
98
131
|
}, 500)
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
/* setup chunking interval */
|
|
102
|
-
this.calcInterval = setInterval(() => {
|
|
103
|
-
if (this.destroyed)
|
|
104
|
-
return
|
|
105
|
-
|
|
106
|
-
/* process one single 50ms chunk if available */
|
|
107
|
-
if (this.chunkBuffer.length >= samplesPerChunk) {
|
|
108
|
-
const chunkData = this.chunkBuffer.slice(0, samplesPerChunk)
|
|
109
|
-
this.chunkBuffer = this.chunkBuffer.slice(samplesPerChunk)
|
|
110
|
-
processChunk(chunkData)
|
|
111
|
-
}
|
|
112
132
|
}, chunkDuration * 1000)
|
|
113
133
|
|
|
114
134
|
/* setup loudness emitting interval */
|
|
115
135
|
this.emitInterval = setInterval(() => {
|
|
116
136
|
if (this.destroyed)
|
|
117
137
|
return
|
|
118
|
-
this.log("debug", `LUFS-
|
|
119
|
-
this.sendResponse([ "meter", "LUFS-
|
|
138
|
+
this.log("debug", `LUFS-M: ${lufsm.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
|
|
139
|
+
this.sendResponse([ "meter", "LUFS-M", lufsm ])
|
|
120
140
|
this.sendResponse([ "meter", "RMS", rms ])
|
|
121
141
|
if (this.params.dashboard !== "")
|
|
122
|
-
this.sendDashboard("audio", this.params.dashboard, "final",
|
|
142
|
+
this.sendDashboard("audio", this.params.dashboard, "final", lufsm)
|
|
123
143
|
}, this.params.interval)
|
|
124
144
|
|
|
125
145
|
/* provide Duplex stream and internally attach to meter */
|
|
@@ -175,6 +195,9 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
175
195
|
|
|
176
196
|
/* close node */
|
|
177
197
|
async close () {
|
|
198
|
+
/* indicate destruction immediately to stop any ongoing operations */
|
|
199
|
+
this.destroyed = true
|
|
200
|
+
|
|
178
201
|
/* stop intervals */
|
|
179
202
|
if (this.emitInterval !== null) {
|
|
180
203
|
clearInterval(this.emitInterval)
|
|
@@ -194,8 +217,5 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
194
217
|
this.stream.destroy()
|
|
195
218
|
this.stream = null
|
|
196
219
|
}
|
|
197
|
-
|
|
198
|
-
/* indicate destruction */
|
|
199
|
-
this.destroyed = true
|
|
200
220
|
}
|
|
201
221
|
}
|
|
@@ -44,6 +44,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
44
44
|
this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
|
|
45
45
|
this.worker.on("error", (err) => {
|
|
46
46
|
this.log("error", `RNNoise worker thread error: ${err}`)
|
|
47
|
+
this.stream?.emit("error", err)
|
|
47
48
|
})
|
|
48
49
|
this.worker.on("exit", (code) => {
|
|
49
50
|
if (code !== 0)
|
|
@@ -158,14 +158,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
158
158
|
}
|
|
159
159
|
}
|
|
160
160
|
catch (error) {
|
|
161
|
-
this.log("error", `VAD frame processing error: ${error}
|
|
161
|
+
this.log("error", `VAD frame processing error: ${error}`, { cause: error })
|
|
162
162
|
}
|
|
163
163
|
}
|
|
164
164
|
})
|
|
165
165
|
this.vad.start()
|
|
166
166
|
}
|
|
167
167
|
catch (error) {
|
|
168
|
-
throw new Error(`failed to initialize VAD: ${error}
|
|
168
|
+
throw new Error(`failed to initialize VAD: ${error}`, { cause: error })
|
|
169
169
|
}
|
|
170
170
|
|
|
171
171
|
/* provide Duplex stream and internally attach to VAD */
|
|
@@ -23,7 +23,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
23
23
|
public static name = "a2t-openai"
|
|
24
24
|
|
|
25
25
|
/* internal state */
|
|
26
|
-
private static speexInitialized = false
|
|
27
26
|
private openai: OpenAI | null = null
|
|
28
27
|
private ws: ws.WebSocket | null = null
|
|
29
28
|
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
@@ -71,11 +70,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
71
70
|
|
|
72
71
|
/* establish resampler from our standard audio sample rate (48Khz)
|
|
73
72
|
to OpenAI's maximum 24Khz input sample rate */
|
|
74
|
-
if (!SpeechFlowNodeA2TOpenAI.speexInitialized) {
|
|
75
|
-
/* at least once initialize resampler */
|
|
76
|
-
await SpeexResampler.initPromise
|
|
77
|
-
SpeechFlowNodeA2TOpenAI.speexInitialized = true
|
|
78
|
-
}
|
|
79
73
|
this.resampler = new SpeexResampler(1, this.config.audioSampleRate, 24000, 7)
|
|
80
74
|
|
|
81
75
|
/* instantiate OpenAI API */
|
|
@@ -26,7 +26,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
26
26
|
|
|
27
27
|
/* internal state */
|
|
28
28
|
private client: PollyClient | null = null
|
|
29
|
-
private static speexInitialized = false
|
|
30
29
|
private destroyed = false
|
|
31
30
|
private resampler: SpeexResampler | null = null
|
|
32
31
|
|
|
@@ -114,11 +113,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
114
113
|
|
|
115
114
|
/* establish resampler from AWS Polly's maximum 16Khz output
|
|
116
115
|
(for PCM output) to our standard audio sample rate (48KHz) */
|
|
117
|
-
if (!SpeechFlowNodeT2AAmazon.speexInitialized) {
|
|
118
|
-
/* at least once initialize resampler */
|
|
119
|
-
await SpeexResampler.initPromise
|
|
120
|
-
SpeechFlowNodeT2AAmazon.speexInitialized = true
|
|
121
|
-
}
|
|
122
116
|
this.resampler = new SpeexResampler(1, 16000, this.config.audioSampleRate, 7)
|
|
123
117
|
|
|
124
118
|
/* create transform stream and connect it to the AWS Polly API */
|
|
@@ -22,7 +22,6 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
22
22
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
25
|
-
private static speexInitialized = false
|
|
26
25
|
private destroyed = false
|
|
27
26
|
private resampler: SpeexResampler | null = null
|
|
28
27
|
|
|
@@ -131,11 +130,6 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
131
130
|
|
|
132
131
|
/* establish resampler from ElevenLabs's maximum 24Khz
|
|
133
132
|
output to our standard audio sample rate (48KHz) */
|
|
134
|
-
if (!SpeechFlowNodeT2AElevenlabs.speexInitialized) {
|
|
135
|
-
/* at least once initialize resampler */
|
|
136
|
-
await SpeexResampler.initPromise
|
|
137
|
-
SpeechFlowNodeT2AElevenlabs.speexInitialized = true
|
|
138
|
-
}
|
|
139
133
|
this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
140
134
|
|
|
141
135
|
/* create transform stream and connect it to the ElevenLabs API */
|
|
@@ -23,7 +23,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private kokoro: KokoroTTS | null = null
|
|
25
25
|
private resampler: SpeexResampler | null = null
|
|
26
|
-
private static speexInitialized = false
|
|
27
26
|
|
|
28
27
|
/* construct node */
|
|
29
28
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -82,11 +81,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
82
81
|
|
|
83
82
|
/* establish resampler from Kokoro's maximum 24Khz
|
|
84
83
|
output to our standard audio sample rate (48KHz) */
|
|
85
|
-
if (!SpeechFlowNodeT2AKokoro.speexInitialized) {
|
|
86
|
-
/* at least once initialize resampler */
|
|
87
|
-
SpeechFlowNodeT2AKokoro.speexInitialized = true
|
|
88
|
-
await SpeexResampler.initPromise
|
|
89
|
-
}
|
|
90
84
|
this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
|
|
91
85
|
|
|
92
86
|
/* determine voice for text-to-speech operation */
|
|
@@ -177,7 +177,7 @@ export default class SpeechFlowNodeT2TOllama extends SpeechFlowNode {
|
|
|
177
177
|
models = await this.ollama.list()
|
|
178
178
|
}
|
|
179
179
|
catch (err) {
|
|
180
|
-
throw new Error(`failed to connect to Ollama API at ${this.params.api}: ${err}
|
|
180
|
+
throw new Error(`failed to connect to Ollama API at ${this.params.api}: ${err}`, { cause: err })
|
|
181
181
|
}
|
|
182
182
|
const exists = models.models.some((m) => m.name === this.params.model)
|
|
183
183
|
if (!exists) {
|
|
@@ -20,13 +20,10 @@ import HAPIWebSocket from "hapi-plugin-websocket"
|
|
|
20
20
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
21
21
|
import * as util from "./speechflow-util"
|
|
22
22
|
|
|
23
|
-
type
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
ctx: wsPeerCtx
|
|
28
|
-
ws: WebSocket
|
|
29
|
-
req: http.IncomingMessage
|
|
23
|
+
type WSPeerInfo = {
|
|
24
|
+
ctx: Record<string, any>
|
|
25
|
+
ws: WebSocket
|
|
26
|
+
req: http.IncomingMessage
|
|
30
27
|
}
|
|
31
28
|
|
|
32
29
|
/* SpeechFlow node for subtitle (text-to-text) "translations" */
|
|
@@ -160,7 +157,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
160
157
|
}
|
|
161
158
|
else if (this.params.mode === "render") {
|
|
162
159
|
/* establish REST/WebSocket API */
|
|
163
|
-
const wsPeers = new Map<string,
|
|
160
|
+
const wsPeers = new Map<string, WSPeerInfo>()
|
|
164
161
|
this.hapi = new HAPI.Server({
|
|
165
162
|
address: this.params.addr,
|
|
166
163
|
port: this.params.port
|
|
@@ -205,19 +202,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
205
202
|
plugins: {
|
|
206
203
|
websocket: {
|
|
207
204
|
autoping: 30 * 1000,
|
|
208
|
-
connect: (
|
|
209
|
-
const ctx: wsPeerCtx = args.ctx
|
|
210
|
-
const ws: WebSocket = args.ws
|
|
211
|
-
const req: http.IncomingMessage = args.req
|
|
205
|
+
connect: ({ ctx, ws, req }) => {
|
|
212
206
|
const peer = `${req.socket.remoteAddress}:${req.socket.remotePort}`
|
|
213
207
|
ctx.peer = peer
|
|
214
208
|
wsPeers.set(peer, { ctx, ws, req })
|
|
215
209
|
this.log("info", `HAPI: WebSocket: connect: peer ${peer}`)
|
|
216
210
|
},
|
|
217
|
-
disconnect: (
|
|
218
|
-
const ctx: wsPeerCtx = args.ctx
|
|
211
|
+
disconnect: ({ ctx, ws }) => {
|
|
219
212
|
const peer = ctx.peer
|
|
220
213
|
wsPeers.delete(peer)
|
|
214
|
+
ws.removeAllListeners()
|
|
215
|
+
if (ws.readyState === WebSocket.OPEN)
|
|
216
|
+
ws.close()
|
|
221
217
|
this.log("info", `HAPI: WebSocket: disconnect: peer ${peer}`)
|
|
222
218
|
}
|
|
223
219
|
}
|
|
@@ -115,7 +115,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
115
115
|
|
|
116
116
|
/* convert regular stream into object-mode stream */
|
|
117
117
|
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
118
|
-
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
|
|
118
|
+
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
|
|
119
119
|
this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
|
|
120
120
|
}
|
|
121
121
|
|
|
@@ -136,7 +136,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
136
136
|
this.stream = this.io as unknown as Stream.Readable
|
|
137
137
|
|
|
138
138
|
/* convert regular stream into object-mode stream */
|
|
139
|
-
const wrapper = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
|
|
139
|
+
const wrapper = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
|
|
140
140
|
this.stream = Stream.compose(this.stream, wrapper)
|
|
141
141
|
}
|
|
142
142
|
|
|
@@ -193,6 +193,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
193
193
|
/* pass-through PortAudio errors */
|
|
194
194
|
this.io!.on("error", (err) => {
|
|
195
195
|
this.emit("error", err)
|
|
196
|
+
this.stream?.emit("error", err)
|
|
196
197
|
})
|
|
197
198
|
|
|
198
199
|
/* start PortAudio */
|
|
@@ -132,6 +132,36 @@ export async function processInt16ArrayInSegments (
|
|
|
132
132
|
return data
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
+
/* update envelope (smoothed amplitude contour) for single channel */
|
|
136
|
+
export function updateEnvelopeForChannel(
|
|
137
|
+
env: number[],
|
|
138
|
+
sampleRate: number,
|
|
139
|
+
chan: number,
|
|
140
|
+
samples: Float32Array,
|
|
141
|
+
attack: number,
|
|
142
|
+
release: number
|
|
143
|
+
): number {
|
|
144
|
+
/* fetch old envelope value */
|
|
145
|
+
if (env[chan] === undefined)
|
|
146
|
+
env[chan] = 1e-12
|
|
147
|
+
let currentEnv = env[chan]
|
|
148
|
+
|
|
149
|
+
/* calculate attack/release alpha values */
|
|
150
|
+
const alphaA = Math.exp(-1 / (attack * sampleRate))
|
|
151
|
+
const alphaR = Math.exp(-1 / (release * sampleRate))
|
|
152
|
+
|
|
153
|
+
/* iterate over all samples and calculate RMS */
|
|
154
|
+
for (const s of samples) {
|
|
155
|
+
const x = Math.abs(s)
|
|
156
|
+
const det = x * x
|
|
157
|
+
if (det > currentEnv)
|
|
158
|
+
currentEnv = alphaA * currentEnv + (1 - alphaA) * det
|
|
159
|
+
else
|
|
160
|
+
currentEnv = alphaR * currentEnv + (1 - alphaR) * det
|
|
161
|
+
}
|
|
162
|
+
return Math.sqrt(Math.max(currentEnv, 1e-12))
|
|
163
|
+
}
|
|
164
|
+
|
|
135
165
|
/* helper functions for linear/decibel conversions */
|
|
136
166
|
export function lin2dB (x: number): number {
|
|
137
167
|
return 20 * Math.log10(Math.max(x, 1e-12))
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* helper function for promise-based timeout */
|
|
8
|
-
export function timeoutPromise (duration: number = 10 * 1000, info = "timeout") {
|
|
9
|
-
return new Promise<
|
|
8
|
+
export function timeoutPromise<T = void> (duration: number = 10 * 1000, info = "timeout") {
|
|
9
|
+
return new Promise<T>((resolve, reject) => {
|
|
10
10
|
setTimeout(() => { reject(new Error(info)) }, duration)
|
|
11
11
|
})
|
|
12
12
|
}
|
|
@@ -21,7 +21,13 @@ export function ensureError (error: unknown, prefix?: string, debug = false): Er
|
|
|
21
21
|
msg = `${prefix}: ${msg}`
|
|
22
22
|
if (debug && error instanceof Error)
|
|
23
23
|
msg = `${msg}\n${error.stack}`
|
|
24
|
-
|
|
24
|
+
if (error instanceof Error) {
|
|
25
|
+
const err = new Error(msg, { cause: error })
|
|
26
|
+
err.stack = error.stack
|
|
27
|
+
return err
|
|
28
|
+
}
|
|
29
|
+
else
|
|
30
|
+
return new Error(msg)
|
|
25
31
|
}
|
|
26
32
|
|
|
27
33
|
/* helper function for retrieving a Promise object */
|