speechflow 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dst/speechflow-node-a2a-gender.d.ts +2 -0
- package/dst/speechflow-node-a2a-gender.js +137 -59
- package/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/dst/speechflow-node-a2a-meter.d.ts +3 -1
- package/dst/speechflow-node-a2a-meter.js +80 -39
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2a-mute.d.ts +1 -0
- package/dst/speechflow-node-a2a-mute.js +37 -11
- package/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/dst/speechflow-node-a2a-vad.d.ts +3 -0
- package/dst/speechflow-node-a2a-vad.js +194 -96
- package/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/dst/speechflow-node-a2a-wav.js +27 -11
- package/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +4 -0
- package/dst/speechflow-node-a2t-deepgram.js +136 -46
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +2 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +61 -12
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +1 -0
- package/dst/speechflow-node-t2a-kokoro.js +10 -4
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/dst/speechflow-node-t2t-deepl.js +8 -4
- package/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/dst/speechflow-node-t2t-format.js +2 -2
- package/dst/speechflow-node-t2t-format.js.map +1 -1
- package/dst/speechflow-node-t2t-ollama.js +1 -1
- package/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/dst/speechflow-node-t2t-openai.js +1 -1
- package/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +1 -1
- package/dst/speechflow-node-t2t-sentence.js +34 -18
- package/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/dst/speechflow-node-t2t-subtitle.d.ts +0 -1
- package/dst/speechflow-node-t2t-subtitle.js +78 -190
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-t2t-transformers.js +2 -2
- package/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/dst/speechflow-node-x2x-filter.js +4 -4
- package/dst/speechflow-node-x2x-trace.js +6 -13
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +12 -8
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node-xio-file.js +9 -3
- package/dst/speechflow-node-xio-file.js.map +1 -1
- package/dst/speechflow-node-xio-mqtt.js +5 -2
- package/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/dst/speechflow-node-xio-websocket.js +11 -11
- package/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/dst/speechflow-node.d.ts +0 -2
- package/dst/speechflow-node.js +0 -3
- package/dst/speechflow-node.js.map +1 -1
- package/dst/speechflow-utils.d.ts +5 -0
- package/dst/speechflow-utils.js +77 -44
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +101 -82
- package/dst/speechflow.js.map +1 -1
- package/etc/eslint.mjs +1 -2
- package/etc/stx.conf +3 -3
- package/package.json +6 -6
- package/src/speechflow-node-a2a-gender.ts +148 -64
- package/src/speechflow-node-a2a-meter.ts +87 -40
- package/src/speechflow-node-a2a-mute.ts +39 -11
- package/src/speechflow-node-a2a-vad.ts +206 -100
- package/src/speechflow-node-a2a-wav.ts +27 -11
- package/src/speechflow-node-a2t-deepgram.ts +139 -43
- package/src/speechflow-node-t2a-elevenlabs.ts +65 -12
- package/src/speechflow-node-t2a-kokoro.ts +11 -4
- package/src/speechflow-node-t2t-deepl.ts +9 -4
- package/src/speechflow-node-t2t-format.ts +2 -2
- package/src/speechflow-node-t2t-ollama.ts +1 -1
- package/src/speechflow-node-t2t-openai.ts +1 -1
- package/src/speechflow-node-t2t-sentence.ts +37 -20
- package/src/speechflow-node-t2t-transformers.ts +4 -3
- package/src/speechflow-node-x2x-filter.ts +4 -4
- package/src/speechflow-node-x2x-trace.ts +1 -1
- package/src/speechflow-node-xio-device.ts +12 -8
- package/src/speechflow-node-xio-file.ts +9 -3
- package/src/speechflow-node-xio-mqtt.ts +5 -2
- package/src/speechflow-node-xio-websocket.ts +12 -12
- package/src/speechflow-utils.ts +78 -44
- package/src/speechflow.ts +114 -35
|
@@ -40,6 +40,9 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
40
40
|
private queueRecv = this.queue.pointerUse("recv")
|
|
41
41
|
private queueVAD = this.queue.pointerUse("vad")
|
|
42
42
|
private queueSend = this.queue.pointerUse("send")
|
|
43
|
+
private destroyed = false
|
|
44
|
+
private tailTimer: ReturnType<typeof setTimeout> | null = null
|
|
45
|
+
private activeEventListeners = new Set<() => void>()
|
|
43
46
|
|
|
44
47
|
/* construct node */
|
|
45
48
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -67,8 +70,8 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
67
70
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
68
71
|
throw new Error("VAD node currently supports PCM-S16LE audio only")
|
|
69
72
|
|
|
70
|
-
/*
|
|
71
|
-
|
|
73
|
+
/* clear destruction flag */
|
|
74
|
+
this.destroyed = false
|
|
72
75
|
|
|
73
76
|
/* internal processing constants */
|
|
74
77
|
const vadSampleRateTarget = 16000 /* internal target of VAD */
|
|
@@ -76,75 +79,101 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
76
79
|
|
|
77
80
|
/* establish Voice Activity Detection (VAD) facility */
|
|
78
81
|
let tail = false
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
if (
|
|
94
|
-
clearTimeout(tailTimer)
|
|
95
|
-
tailTimer = null
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
},
|
|
99
|
-
onSpeechEnd: (audio) => {
|
|
100
|
-
const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
|
|
101
|
-
log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
102
|
-
if (this.params.mode === "unlugged") {
|
|
103
|
-
tail = true
|
|
104
|
-
if (tailTimer !== null)
|
|
105
|
-
clearTimeout(tailTimer)
|
|
106
|
-
tailTimer = setTimeout(() => {
|
|
82
|
+
try {
|
|
83
|
+
this.vad = await RealTimeVAD.new({
|
|
84
|
+
model: "v5",
|
|
85
|
+
sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
|
|
86
|
+
frameSamples: vadSamplesPerFrame, /* after resampling to 16KHz */
|
|
87
|
+
positiveSpeechThreshold: this.params.posSpeechThreshold,
|
|
88
|
+
negativeSpeechThreshold: this.params.negSpeechThreshold,
|
|
89
|
+
minSpeechFrames: this.params.minSpeechFrames,
|
|
90
|
+
redemptionFrames: this.params.redemptionFrames,
|
|
91
|
+
preSpeechPadFrames: this.params.preSpeechPadFrames,
|
|
92
|
+
onSpeechStart: () => {
|
|
93
|
+
if (this.destroyed)
|
|
94
|
+
return
|
|
95
|
+
this.log("info", "VAD: speech start")
|
|
96
|
+
if (this.params.mode === "unplugged") {
|
|
107
97
|
tail = false
|
|
108
|
-
tailTimer
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
onFrameProcessed: (audio) => {
|
|
125
|
-
/* annotate the current audio segment */
|
|
126
|
-
const element = this.queueVAD.peek()
|
|
127
|
-
if (element === undefined || element.type !== "audio-frame")
|
|
128
|
-
throw new Error("internal error which cannot happen: no more queued element")
|
|
129
|
-
const segment = element.segmentData[element.segmentIdx++]
|
|
130
|
-
segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
|
|
131
|
-
|
|
132
|
-
/* annotate the entire audio chunk */
|
|
133
|
-
if (element.segmentIdx >= element.segmentData.length) {
|
|
134
|
-
let isSpeech = false
|
|
135
|
-
for (const segment of element.segmentData) {
|
|
136
|
-
if (segment.isSpeech) {
|
|
137
|
-
isSpeech = true
|
|
138
|
-
break
|
|
98
|
+
if (this.tailTimer !== null) {
|
|
99
|
+
clearTimeout(this.tailTimer)
|
|
100
|
+
this.tailTimer = null
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
onSpeechEnd: (audio) => {
|
|
105
|
+
if (this.destroyed)
|
|
106
|
+
return
|
|
107
|
+
const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
|
|
108
|
+
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
109
|
+
if (this.params.mode === "unplugged") {
|
|
110
|
+
tail = true
|
|
111
|
+
if (this.tailTimer !== null) {
|
|
112
|
+
clearTimeout(this.tailTimer)
|
|
113
|
+
this.tailTimer = null
|
|
139
114
|
}
|
|
115
|
+
this.tailTimer = setTimeout(() => {
|
|
116
|
+
if (this.destroyed || this.tailTimer === null)
|
|
117
|
+
return
|
|
118
|
+
tail = false
|
|
119
|
+
this.tailTimer = null
|
|
120
|
+
}, this.params.postSpeechTail)
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
onVADMisfire: () => {
|
|
124
|
+
if (this.destroyed) return
|
|
125
|
+
this.log("info", "VAD: speech end (segment too short)")
|
|
126
|
+
if (this.params.mode === "unplugged") {
|
|
127
|
+
tail = true
|
|
128
|
+
if (this.tailTimer !== null) {
|
|
129
|
+
clearTimeout(this.tailTimer)
|
|
130
|
+
this.tailTimer = null
|
|
131
|
+
}
|
|
132
|
+
this.tailTimer = setTimeout(() => {
|
|
133
|
+
if (this.destroyed || this.tailTimer === null)
|
|
134
|
+
return
|
|
135
|
+
tail = false
|
|
136
|
+
this.tailTimer = null
|
|
137
|
+
}, this.params.postSpeechTail)
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
onFrameProcessed: (audio) => {
|
|
141
|
+
if (this.destroyed)
|
|
142
|
+
return
|
|
143
|
+
try {
|
|
144
|
+
/* annotate the current audio segment */
|
|
145
|
+
const element = this.queueVAD.peek()
|
|
146
|
+
if (element === undefined || element.type !== "audio-frame")
|
|
147
|
+
throw new Error("internal error which cannot happen: no more queued element")
|
|
148
|
+
if (element.segmentIdx >= element.segmentData.length)
|
|
149
|
+
throw new Error("segment index out of bounds")
|
|
150
|
+
const segment = element.segmentData[element.segmentIdx++]
|
|
151
|
+
segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
|
|
152
|
+
|
|
153
|
+
/* annotate the entire audio chunk */
|
|
154
|
+
if (element.segmentIdx >= element.segmentData.length) {
|
|
155
|
+
let isSpeech = false
|
|
156
|
+
for (const segment of element.segmentData) {
|
|
157
|
+
if (segment.isSpeech) {
|
|
158
|
+
isSpeech = true
|
|
159
|
+
break
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
element.isSpeech = isSpeech
|
|
163
|
+
this.queueVAD.touch()
|
|
164
|
+
this.queueVAD.walk(+1)
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
this.log("error", `VAD frame processing error: ${error}`)
|
|
140
169
|
}
|
|
141
|
-
element.isSpeech = isSpeech
|
|
142
|
-
this.queueVAD.touch()
|
|
143
|
-
this.queueVAD.walk(+1)
|
|
144
170
|
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
|
|
171
|
+
})
|
|
172
|
+
this.vad.start()
|
|
173
|
+
}
|
|
174
|
+
catch (error) {
|
|
175
|
+
throw new Error(`failed to initialize VAD: ${error}`)
|
|
176
|
+
}
|
|
148
177
|
|
|
149
178
|
/* provide Duplex stream and internally attach to VAD */
|
|
150
179
|
const self = this
|
|
@@ -156,47 +185,70 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
156
185
|
|
|
157
186
|
/* receive audio chunk (writable side of stream) */
|
|
158
187
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
188
|
+
if (self.destroyed) {
|
|
189
|
+
callback(new Error("stream already destroyed"))
|
|
190
|
+
return
|
|
191
|
+
}
|
|
159
192
|
if (!Buffer.isBuffer(chunk.payload))
|
|
160
193
|
callback(new Error("expected audio input as Buffer chunks"))
|
|
161
194
|
else if (chunk.payload.byteLength === 0)
|
|
162
195
|
callback()
|
|
163
196
|
else {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
const segmentData: AudioQueueElementSegment[] = []
|
|
169
|
-
const chunkSize = vadSamplesPerFrame * (self.config.audioSampleRate / vadSampleRateTarget)
|
|
170
|
-
const chunks = Math.trunc(data.length / chunkSize)
|
|
171
|
-
for (let i = 0; i < chunks; i++) {
|
|
172
|
-
const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
|
|
173
|
-
const segment: AudioQueueElementSegment = { data: frame }
|
|
174
|
-
segmentData.push(segment)
|
|
175
|
-
}
|
|
176
|
-
if ((chunks * chunkSize) < data.length) {
|
|
177
|
-
const frame = new Float32Array(chunkSize)
|
|
178
|
-
frame.fill(0)
|
|
179
|
-
frame.set(data.slice(chunks * chunkSize, data.length))
|
|
180
|
-
const segment: AudioQueueElementSegment = { data: frame }
|
|
181
|
-
segmentData.push(segment)
|
|
182
|
-
}
|
|
197
|
+
try {
|
|
198
|
+
/* convert audio samples from PCM/I16 to PCM/F32 */
|
|
199
|
+
const data = utils.convertBufToF32(chunk.payload,
|
|
200
|
+
self.config.audioLittleEndian)
|
|
183
201
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
202
|
+
/* segment audio samples as individual VAD-sized frames */
|
|
203
|
+
const segmentData: AudioQueueElementSegment[] = []
|
|
204
|
+
const chunkSize = vadSamplesPerFrame *
|
|
205
|
+
(self.config.audioSampleRate / vadSampleRateTarget)
|
|
206
|
+
const chunks = Math.trunc(data.length / chunkSize)
|
|
207
|
+
for (let i = 0; i < chunks; i++) {
|
|
208
|
+
const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
|
|
209
|
+
const segment: AudioQueueElementSegment = { data: frame }
|
|
210
|
+
segmentData.push(segment)
|
|
211
|
+
}
|
|
212
|
+
if ((chunks * chunkSize) < data.length) {
|
|
213
|
+
const frame = new Float32Array(chunkSize)
|
|
214
|
+
frame.fill(0)
|
|
215
|
+
frame.set(data.slice(chunks * chunkSize, data.length))
|
|
216
|
+
const segment: AudioQueueElementSegment = { data: frame }
|
|
217
|
+
segmentData.push(segment)
|
|
218
|
+
}
|
|
189
219
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
220
|
+
/* queue the results */
|
|
221
|
+
self.queueRecv.append({
|
|
222
|
+
type: "audio-frame", chunk,
|
|
223
|
+
segmentIdx: 0, segmentData
|
|
224
|
+
})
|
|
193
225
|
|
|
194
|
-
|
|
226
|
+
/* push segments through Voice Activity Detection (VAD) */
|
|
227
|
+
if (self.vad && !self.destroyed) {
|
|
228
|
+
try {
|
|
229
|
+
for (const segment of segmentData)
|
|
230
|
+
self.vad.processAudio(segment.data)
|
|
231
|
+
}
|
|
232
|
+
catch (error) {
|
|
233
|
+
self.log("error", `VAD processAudio error: ${error}`)
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
callback()
|
|
238
|
+
}
|
|
239
|
+
catch (error) {
|
|
240
|
+
callback(error instanceof Error ? error : new Error("VAD processing failed"))
|
|
241
|
+
}
|
|
195
242
|
}
|
|
196
243
|
},
|
|
197
244
|
|
|
198
245
|
/* receive no more audio chunks (writable side of stream) */
|
|
199
246
|
final (callback) {
|
|
247
|
+
if (self.destroyed) {
|
|
248
|
+
callback()
|
|
249
|
+
return
|
|
250
|
+
}
|
|
251
|
+
|
|
200
252
|
/* signal end of file */
|
|
201
253
|
self.queueRecv.append({ type: "audio-eof" })
|
|
202
254
|
callback()
|
|
@@ -204,12 +256,26 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
204
256
|
|
|
205
257
|
/* send audio chunk(s) (readable side of stream) */
|
|
206
258
|
read (_size) {
|
|
259
|
+
if (self.destroyed) {
|
|
260
|
+
this.push(null)
|
|
261
|
+
return
|
|
262
|
+
}
|
|
263
|
+
|
|
207
264
|
/* try to perform read operation from scratch */
|
|
208
265
|
const tryToRead = () => {
|
|
266
|
+
if (self.destroyed) {
|
|
267
|
+
this.push(null)
|
|
268
|
+
return
|
|
269
|
+
}
|
|
270
|
+
|
|
209
271
|
/* flush pending audio chunks */
|
|
210
272
|
const flushPendingChunks = () => {
|
|
211
273
|
let pushed = 0
|
|
212
274
|
while (true) {
|
|
275
|
+
if (self.destroyed) {
|
|
276
|
+
this.push(null)
|
|
277
|
+
return
|
|
278
|
+
}
|
|
213
279
|
const element = self.queueSend.peek()
|
|
214
280
|
if (element === undefined)
|
|
215
281
|
break
|
|
@@ -233,23 +299,33 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
233
299
|
this.push(chunk)
|
|
234
300
|
pushed++
|
|
235
301
|
}
|
|
236
|
-
else if (self.params.mode === "unplugged" && pushed === 0)
|
|
302
|
+
else if (self.params.mode === "unplugged" && pushed === 0) {
|
|
237
303
|
/* we have to await chunks now, as in unplugged
|
|
238
304
|
mode we else would be never called again until
|
|
239
305
|
we at least once push a new chunk as the result */
|
|
240
|
-
|
|
306
|
+
setTimeout(() => {
|
|
307
|
+
if (self.destroyed)
|
|
308
|
+
return
|
|
309
|
+
tryToRead()
|
|
310
|
+
}, 0)
|
|
311
|
+
return
|
|
312
|
+
}
|
|
241
313
|
}
|
|
242
314
|
}
|
|
243
315
|
|
|
244
316
|
/* await forthcoming audio chunks */
|
|
245
317
|
const awaitForthcomingChunks = () => {
|
|
318
|
+
if (self.destroyed)
|
|
319
|
+
return
|
|
246
320
|
const element = self.queueSend.peek()
|
|
247
321
|
if (element !== undefined
|
|
248
322
|
&& element.type === "audio-frame"
|
|
249
323
|
&& element.isSpeech !== undefined)
|
|
250
324
|
flushPendingChunks()
|
|
251
|
-
else
|
|
325
|
+
else if (!self.destroyed) {
|
|
252
326
|
self.queue.once("write", awaitForthcomingChunks)
|
|
327
|
+
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
328
|
+
}
|
|
253
329
|
}
|
|
254
330
|
|
|
255
331
|
const element = self.queueSend.peek()
|
|
@@ -259,8 +335,10 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
259
335
|
&& element.type === "audio-frame"
|
|
260
336
|
&& element.isSpeech !== undefined)
|
|
261
337
|
flushPendingChunks()
|
|
262
|
-
else
|
|
338
|
+
else if (!self.destroyed) {
|
|
263
339
|
self.queue.once("write", awaitForthcomingChunks)
|
|
340
|
+
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
341
|
+
}
|
|
264
342
|
}
|
|
265
343
|
tryToRead()
|
|
266
344
|
}
|
|
@@ -269,15 +347,43 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
269
347
|
|
|
270
348
|
/* close node */
|
|
271
349
|
async close () {
|
|
350
|
+
/* indicate destruction */
|
|
351
|
+
this.destroyed = true
|
|
352
|
+
|
|
353
|
+
/* cleanup tail timer */
|
|
354
|
+
if (this.tailTimer !== null) {
|
|
355
|
+
clearTimeout(this.tailTimer)
|
|
356
|
+
this.tailTimer = null
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/* remove all event listeners */
|
|
360
|
+
this.activeEventListeners.forEach((listener) => {
|
|
361
|
+
this.queue.removeListener("write", listener)
|
|
362
|
+
})
|
|
363
|
+
this.activeEventListeners.clear()
|
|
364
|
+
|
|
272
365
|
/* close stream */
|
|
273
366
|
if (this.stream !== null) {
|
|
274
367
|
this.stream.destroy()
|
|
275
368
|
this.stream = null
|
|
276
369
|
}
|
|
277
370
|
|
|
371
|
+
/* cleanup queue pointers before closing VAD to prevent callback access */
|
|
372
|
+
this.queue.pointerDelete("recv")
|
|
373
|
+
this.queue.pointerDelete("vad")
|
|
374
|
+
this.queue.pointerDelete("send")
|
|
375
|
+
|
|
278
376
|
/* close VAD */
|
|
279
377
|
if (this.vad !== null) {
|
|
280
|
-
|
|
378
|
+
try {
|
|
379
|
+
const flushPromise = this.vad.flush()
|
|
380
|
+
const timeoutPromise = new Promise((resolve) =>
|
|
381
|
+
setTimeout(resolve, 5000))
|
|
382
|
+
await Promise.race([ flushPromise, timeoutPromise ])
|
|
383
|
+
}
|
|
384
|
+
catch (error) {
|
|
385
|
+
this.log("warning", `VAD flush error during close: ${error}`)
|
|
386
|
+
}
|
|
281
387
|
this.vad.destroy()
|
|
282
388
|
this.vad = null
|
|
283
389
|
}
|
|
@@ -103,8 +103,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
103
103
|
decodeStrings: false,
|
|
104
104
|
highWaterMark: 1,
|
|
105
105
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
106
|
-
if (!Buffer.isBuffer(chunk.payload))
|
|
106
|
+
if (!Buffer.isBuffer(chunk.payload)) {
|
|
107
107
|
callback(new Error("invalid chunk payload type"))
|
|
108
|
+
return
|
|
109
|
+
}
|
|
108
110
|
else if (firstChunk) {
|
|
109
111
|
if (self.params.mode === "encode") {
|
|
110
112
|
/* convert raw/PCM to WAV/PCM
|
|
@@ -127,6 +129,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
127
129
|
}
|
|
128
130
|
else if (self.params.mode === "decode") {
|
|
129
131
|
/* convert WAV/PCM to raw/PCM */
|
|
132
|
+
if (chunk.payload.length < 44) {
|
|
133
|
+
callback(new Error("WAV header too short, expected at least 44 bytes"))
|
|
134
|
+
return
|
|
135
|
+
}
|
|
130
136
|
const header = readWavHeader(chunk.payload)
|
|
131
137
|
self.log("info", "WAV audio stream: " +
|
|
132
138
|
`audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
|
|
@@ -134,20 +140,30 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
134
140
|
`channels=${header.channels} ` +
|
|
135
141
|
`sampleRate=${header.sampleRate} ` +
|
|
136
142
|
`bitDepth=${header.bitDepth}`)
|
|
137
|
-
if (header.audioFormat !== 0x0001 /* PCM */)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if (header.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
if (header.audioFormat !== 0x0001 /* PCM */) {
|
|
144
|
+
callback(new Error("WAV not based on PCM format"))
|
|
145
|
+
return
|
|
146
|
+
}
|
|
147
|
+
if (header.bitDepth !== self.config.audioBitDepth) {
|
|
148
|
+
callback(new Error(`WAV not based on ${self.config.audioBitDepth} bit samples`))
|
|
149
|
+
return
|
|
150
|
+
}
|
|
151
|
+
if (header.sampleRate !== self.config.audioSampleRate) {
|
|
152
|
+
callback(new Error(`WAV not based on ${self.config.audioSampleRate}Hz sample rate`))
|
|
153
|
+
return
|
|
154
|
+
}
|
|
155
|
+
if (header.channels !== self.config.audioChannels) {
|
|
156
|
+
callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
|
|
157
|
+
return
|
|
158
|
+
}
|
|
145
159
|
chunk.payload = chunk.payload.subarray(44)
|
|
146
160
|
this.push(chunk)
|
|
147
161
|
callback()
|
|
148
162
|
}
|
|
149
|
-
else
|
|
150
|
-
|
|
163
|
+
else {
|
|
164
|
+
callback(new Error(`invalid operation mode "${self.params.mode}"`))
|
|
165
|
+
return
|
|
166
|
+
}
|
|
151
167
|
}
|
|
152
168
|
else {
|
|
153
169
|
/* pass-through original chunk */
|