speechflow 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +165 -22
  3. package/dst/speechflow-node-a2a-gender.d.ts +2 -0
  4. package/dst/speechflow-node-a2a-gender.js +137 -59
  5. package/dst/speechflow-node-a2a-gender.js.map +1 -1
  6. package/dst/speechflow-node-a2a-meter.d.ts +3 -1
  7. package/dst/speechflow-node-a2a-meter.js +79 -35
  8. package/dst/speechflow-node-a2a-meter.js.map +1 -1
  9. package/dst/speechflow-node-a2a-mute.d.ts +1 -0
  10. package/dst/speechflow-node-a2a-mute.js +37 -11
  11. package/dst/speechflow-node-a2a-mute.js.map +1 -1
  12. package/dst/speechflow-node-a2a-vad.d.ts +3 -0
  13. package/dst/speechflow-node-a2a-vad.js +194 -96
  14. package/dst/speechflow-node-a2a-vad.js.map +1 -1
  15. package/dst/speechflow-node-a2a-wav.js +27 -11
  16. package/dst/speechflow-node-a2a-wav.js.map +1 -1
  17. package/dst/speechflow-node-a2t-deepgram.d.ts +4 -0
  18. package/dst/speechflow-node-a2t-deepgram.js +141 -43
  19. package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  20. package/dst/speechflow-node-t2a-elevenlabs.d.ts +2 -0
  21. package/dst/speechflow-node-t2a-elevenlabs.js +61 -12
  22. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  23. package/dst/speechflow-node-t2a-kokoro.d.ts +1 -0
  24. package/dst/speechflow-node-t2a-kokoro.js +10 -4
  25. package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  26. package/dst/speechflow-node-t2t-deepl.js +8 -4
  27. package/dst/speechflow-node-t2t-deepl.js.map +1 -1
  28. package/dst/speechflow-node-t2t-format.js +2 -2
  29. package/dst/speechflow-node-t2t-format.js.map +1 -1
  30. package/dst/speechflow-node-t2t-ollama.js +1 -1
  31. package/dst/speechflow-node-t2t-ollama.js.map +1 -1
  32. package/dst/speechflow-node-t2t-openai.js +1 -1
  33. package/dst/speechflow-node-t2t-openai.js.map +1 -1
  34. package/dst/speechflow-node-t2t-sentence.d.ts +1 -1
  35. package/dst/speechflow-node-t2t-sentence.js +35 -24
  36. package/dst/speechflow-node-t2t-sentence.js.map +1 -1
  37. package/dst/speechflow-node-t2t-subtitle.js +85 -17
  38. package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  39. package/dst/speechflow-node-t2t-transformers.js +2 -2
  40. package/dst/speechflow-node-t2t-transformers.js.map +1 -1
  41. package/dst/speechflow-node-x2x-filter.js +4 -4
  42. package/dst/speechflow-node-x2x-trace.js +1 -1
  43. package/dst/speechflow-node-x2x-trace.js.map +1 -1
  44. package/dst/speechflow-node-xio-device.js +12 -8
  45. package/dst/speechflow-node-xio-device.js.map +1 -1
  46. package/dst/speechflow-node-xio-file.js +9 -3
  47. package/dst/speechflow-node-xio-file.js.map +1 -1
  48. package/dst/speechflow-node-xio-mqtt.js +5 -2
  49. package/dst/speechflow-node-xio-mqtt.js.map +1 -1
  50. package/dst/speechflow-node-xio-websocket.js +11 -11
  51. package/dst/speechflow-node-xio-websocket.js.map +1 -1
  52. package/dst/speechflow-utils.d.ts +5 -0
  53. package/dst/speechflow-utils.js +77 -44
  54. package/dst/speechflow-utils.js.map +1 -1
  55. package/dst/speechflow.js +104 -34
  56. package/dst/speechflow.js.map +1 -1
  57. package/etc/eslint.mjs +1 -2
  58. package/etc/speechflow.yaml +18 -7
  59. package/etc/stx.conf +3 -3
  60. package/package.json +14 -13
  61. package/src/speechflow-node-a2a-gender.ts +148 -64
  62. package/src/speechflow-node-a2a-meter.ts +87 -40
  63. package/src/speechflow-node-a2a-mute.ts +39 -11
  64. package/src/speechflow-node-a2a-vad.ts +206 -100
  65. package/src/speechflow-node-a2a-wav.ts +27 -11
  66. package/src/speechflow-node-a2t-deepgram.ts +148 -45
  67. package/src/speechflow-node-t2a-elevenlabs.ts +65 -12
  68. package/src/speechflow-node-t2a-kokoro.ts +11 -4
  69. package/src/speechflow-node-t2t-deepl.ts +9 -4
  70. package/src/speechflow-node-t2t-format.ts +2 -2
  71. package/src/speechflow-node-t2t-ollama.ts +1 -1
  72. package/src/speechflow-node-t2t-openai.ts +1 -1
  73. package/src/speechflow-node-t2t-sentence.ts +38 -27
  74. package/src/speechflow-node-t2t-subtitle.ts +62 -15
  75. package/src/speechflow-node-t2t-transformers.ts +4 -3
  76. package/src/speechflow-node-x2x-filter.ts +4 -4
  77. package/src/speechflow-node-x2x-trace.ts +1 -1
  78. package/src/speechflow-node-xio-device.ts +12 -8
  79. package/src/speechflow-node-xio-file.ts +9 -3
  80. package/src/speechflow-node-xio-mqtt.ts +5 -2
  81. package/src/speechflow-node-xio-websocket.ts +12 -12
  82. package/src/speechflow-utils.ts +78 -44
  83. package/src/speechflow.ts +117 -36
@@ -23,6 +23,7 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
23
23
 
24
24
  /* internal state */
25
25
  private muteMode: MuteMode = "none"
26
+ private destroyed = false
26
27
 
27
28
  /* construct node */
28
29
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -38,25 +39,40 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
38
39
 
39
40
  /* receive external request */
40
41
  async receiveRequest (params: any[]) {
41
- if (params.length === 2 && params[0] === "mode") {
42
- if (!params[1].match(/^(?:none|silenced|unplugged)$/))
43
- throw new Error("mute: invalid mode argument in external request")
44
- const muteMode: MuteMode = params[1] as MuteMode
45
- this.setMuteMode(muteMode)
46
- this.sendResponse([ "mute", "mode", muteMode ])
42
+ if (this.destroyed)
43
+ throw new Error("mute: node already destroyed")
44
+ try {
45
+ if (params.length === 2 && params[0] === "mode") {
46
+ if (!params[1].match(/^(?:none|silenced|unplugged)$/))
47
+ throw new Error("mute: invalid mode argument in external request")
48
+ const muteMode: MuteMode = params[1] as MuteMode
49
+ this.setMuteMode(muteMode)
50
+ this.sendResponse([ "mute", "mode", muteMode ])
51
+ }
52
+ else
53
+ throw new Error("mute: invalid arguments in external request")
54
+ }
55
+ catch (error) {
56
+ this.log("error", `receive request error: ${error}`)
57
+ throw error
47
58
  }
48
- else
49
- throw new Error("mute: invalid arguments in external request")
50
59
  }
51
60
 
52
61
  /* change mute mode */
53
62
  setMuteMode (mode: MuteMode) {
63
+ if (this.destroyed) {
64
+ this.log("warning", "attempted to set mute mode on destroyed node")
65
+ return
66
+ }
54
67
  this.log("info", `setting mute mode to "${mode}"`)
55
68
  this.muteMode = mode
56
69
  }
57
70
 
58
71
  /* open node */
59
72
  async open () {
73
+ /* clear destruction flag */
74
+ this.destroyed = false
75
+
60
76
  /* establish a transform stream */
61
77
  const self = this
62
78
  this.stream = new Stream.Transform({
@@ -64,6 +80,10 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
64
80
  writableObjectMode: true,
65
81
  decodeStrings: false,
66
82
  transform (chunk: SpeechFlowChunk, encoding, callback) {
83
+ if (self.destroyed) {
84
+ callback(new Error("stream already destroyed"))
85
+ return
86
+ }
67
87
  if (!Buffer.isBuffer(chunk.payload))
68
88
  callback(new Error("invalid chunk payload type"))
69
89
  else if (self.muteMode === "unplugged")
@@ -71,10 +91,11 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
71
91
  callback()
72
92
  else if (self.muteMode === "silenced") {
73
93
  /* pass-through a silenced chunk */
74
- chunk = chunk.clone()
75
- chunk.meta.set("muted", true)
76
- const buffer = chunk.payload as Buffer
94
+ const chunkSilenced = chunk.clone()
95
+ chunkSilenced.meta.set("muted", true)
96
+ const buffer = chunkSilenced.payload as Buffer
77
97
  buffer.fill(0)
98
+ this.push(chunkSilenced)
78
99
  callback()
79
100
  }
80
101
  else {
@@ -84,6 +105,10 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
84
105
  }
85
106
  },
86
107
  final (callback) {
108
+ if (self.destroyed) {
109
+ callback()
110
+ return
111
+ }
87
112
  this.push(null)
88
113
  callback()
89
114
  }
@@ -92,6 +117,9 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
92
117
 
93
118
  /* close node */
94
119
  async close () {
120
+ /* indicate destruction */
121
+ this.destroyed = true
122
+
95
123
  /* close stream */
96
124
  if (this.stream !== null) {
97
125
  this.stream.destroy()
@@ -40,6 +40,9 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
40
40
  private queueRecv = this.queue.pointerUse("recv")
41
41
  private queueVAD = this.queue.pointerUse("vad")
42
42
  private queueSend = this.queue.pointerUse("send")
43
+ private destroyed = false
44
+ private tailTimer: ReturnType<typeof setTimeout> | null = null
45
+ private activeEventListeners = new Set<() => void>()
43
46
 
44
47
  /* construct node */
45
48
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -67,8 +70,8 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
67
70
  if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
68
71
  throw new Error("VAD node currently supports PCM-S16LE audio only")
69
72
 
70
- /* pass-through logging */
71
- const log = (level: string, msg: string) => { this.log(level, msg) }
73
+ /* clear destruction flag */
74
+ this.destroyed = false
72
75
 
73
76
  /* internal processing constants */
74
77
  const vadSampleRateTarget = 16000 /* internal target of VAD */
@@ -76,75 +79,101 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
76
79
 
77
80
  /* establish Voice Activity Detection (VAD) facility */
78
81
  let tail = false
79
- let tailTimer: ReturnType<typeof setTimeout> | null = null
80
- this.vad = await RealTimeVAD.new({
81
- model: "v5",
82
- sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
83
- frameSamples: vadSamplesPerFrame, /* after resampling to 16KHz */
84
- positiveSpeechThreshold: this.params.posSpeechThreshold,
85
- negativeSpeechThreshold: this.params.negSpeechThreshold,
86
- minSpeechFrames: this.params.minSpeechFrames,
87
- redemptionFrames: this.params.redemptionFrames,
88
- preSpeechPadFrames: this.params.preSpeechPadFrames,
89
- onSpeechStart: () => {
90
- log("info", "VAD: speech start")
91
- if (this.params.mode === "unlugged") {
92
- tail = false
93
- if (tailTimer !== null) {
94
- clearTimeout(tailTimer)
95
- tailTimer = null
96
- }
97
- }
98
- },
99
- onSpeechEnd: (audio) => {
100
- const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
101
- log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
102
- if (this.params.mode === "unlugged") {
103
- tail = true
104
- if (tailTimer !== null)
105
- clearTimeout(tailTimer)
106
- tailTimer = setTimeout(() => {
82
+ try {
83
+ this.vad = await RealTimeVAD.new({
84
+ model: "v5",
85
+ sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
86
+ frameSamples: vadSamplesPerFrame, /* after resampling to 16KHz */
87
+ positiveSpeechThreshold: this.params.posSpeechThreshold,
88
+ negativeSpeechThreshold: this.params.negSpeechThreshold,
89
+ minSpeechFrames: this.params.minSpeechFrames,
90
+ redemptionFrames: this.params.redemptionFrames,
91
+ preSpeechPadFrames: this.params.preSpeechPadFrames,
92
+ onSpeechStart: () => {
93
+ if (this.destroyed)
94
+ return
95
+ this.log("info", "VAD: speech start")
96
+ if (this.params.mode === "unplugged") {
107
97
  tail = false
108
- tailTimer = null
109
- }, this.params.postSpeechTail)
110
- }
111
- },
112
- onVADMisfire: () => {
113
- log("info", "VAD: speech end (segment too short)")
114
- if (this.params.mode === "unlugged") {
115
- tail = true
116
- if (tailTimer !== null)
117
- clearTimeout(tailTimer)
118
- tailTimer = setTimeout(() => {
119
- tail = false
120
- tailTimer = null
121
- }, this.params.postSpeechTail)
122
- }
123
- },
124
- onFrameProcessed: (audio) => {
125
- /* annotate the current audio segment */
126
- const element = this.queueVAD.peek()
127
- if (element === undefined || element.type !== "audio-frame")
128
- throw new Error("internal error which cannot happen: no more queued element")
129
- const segment = element.segmentData[element.segmentIdx++]
130
- segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
131
-
132
- /* annotate the entire audio chunk */
133
- if (element.segmentIdx >= element.segmentData.length) {
134
- let isSpeech = false
135
- for (const segment of element.segmentData) {
136
- if (segment.isSpeech) {
137
- isSpeech = true
138
- break
98
+ if (this.tailTimer !== null) {
99
+ clearTimeout(this.tailTimer)
100
+ this.tailTimer = null
101
+ }
102
+ }
103
+ },
104
+ onSpeechEnd: (audio) => {
105
+ if (this.destroyed)
106
+ return
107
+ const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
108
+ this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
109
+ if (this.params.mode === "unplugged") {
110
+ tail = true
111
+ if (this.tailTimer !== null) {
112
+ clearTimeout(this.tailTimer)
113
+ this.tailTimer = null
139
114
  }
115
+ this.tailTimer = setTimeout(() => {
116
+ if (this.destroyed || this.tailTimer === null)
117
+ return
118
+ tail = false
119
+ this.tailTimer = null
120
+ }, this.params.postSpeechTail)
121
+ }
122
+ },
123
+ onVADMisfire: () => {
124
+ if (this.destroyed) return
125
+ this.log("info", "VAD: speech end (segment too short)")
126
+ if (this.params.mode === "unplugged") {
127
+ tail = true
128
+ if (this.tailTimer !== null) {
129
+ clearTimeout(this.tailTimer)
130
+ this.tailTimer = null
131
+ }
132
+ this.tailTimer = setTimeout(() => {
133
+ if (this.destroyed || this.tailTimer === null)
134
+ return
135
+ tail = false
136
+ this.tailTimer = null
137
+ }, this.params.postSpeechTail)
138
+ }
139
+ },
140
+ onFrameProcessed: (audio) => {
141
+ if (this.destroyed)
142
+ return
143
+ try {
144
+ /* annotate the current audio segment */
145
+ const element = this.queueVAD.peek()
146
+ if (element === undefined || element.type !== "audio-frame")
147
+ throw new Error("internal error which cannot happen: no more queued element")
148
+ if (element.segmentIdx >= element.segmentData.length)
149
+ throw new Error("segment index out of bounds")
150
+ const segment = element.segmentData[element.segmentIdx++]
151
+ segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
152
+
153
+ /* annotate the entire audio chunk */
154
+ if (element.segmentIdx >= element.segmentData.length) {
155
+ let isSpeech = false
156
+ for (const segment of element.segmentData) {
157
+ if (segment.isSpeech) {
158
+ isSpeech = true
159
+ break
160
+ }
161
+ }
162
+ element.isSpeech = isSpeech
163
+ this.queueVAD.touch()
164
+ this.queueVAD.walk(+1)
165
+ }
166
+ }
167
+ catch (error) {
168
+ this.log("error", `VAD frame processing error: ${error}`)
140
169
  }
141
- element.isSpeech = isSpeech
142
- this.queueVAD.touch()
143
- this.queueVAD.walk(+1)
144
170
  }
145
- }
146
- })
147
- this.vad.start()
171
+ })
172
+ this.vad.start()
173
+ }
174
+ catch (error) {
175
+ throw new Error(`failed to initialize VAD: ${error}`)
176
+ }
148
177
 
149
178
  /* provide Duplex stream and internally attach to VAD */
150
179
  const self = this
@@ -156,47 +185,70 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
156
185
 
157
186
  /* receive audio chunk (writable side of stream) */
158
187
  write (chunk: SpeechFlowChunk, encoding, callback) {
188
+ if (self.destroyed) {
189
+ callback(new Error("stream already destroyed"))
190
+ return
191
+ }
159
192
  if (!Buffer.isBuffer(chunk.payload))
160
193
  callback(new Error("expected audio input as Buffer chunks"))
161
194
  else if (chunk.payload.byteLength === 0)
162
195
  callback()
163
196
  else {
164
- /* convert audio samples from PCM/I16 to PCM/F32 */
165
- const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
166
-
167
- /* segment audio samples as individual VAD-sized frames */
168
- const segmentData: AudioQueueElementSegment[] = []
169
- const chunkSize = vadSamplesPerFrame * (self.config.audioSampleRate / vadSampleRateTarget)
170
- const chunks = Math.trunc(data.length / chunkSize)
171
- for (let i = 0; i < chunks; i++) {
172
- const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
173
- const segment: AudioQueueElementSegment = { data: frame }
174
- segmentData.push(segment)
175
- }
176
- if ((chunks * chunkSize) < data.length) {
177
- const frame = new Float32Array(chunkSize)
178
- frame.fill(0)
179
- frame.set(data.slice(chunks * chunkSize, data.length))
180
- const segment: AudioQueueElementSegment = { data: frame }
181
- segmentData.push(segment)
182
- }
197
+ try {
198
+ /* convert audio samples from PCM/I16 to PCM/F32 */
199
+ const data = utils.convertBufToF32(chunk.payload,
200
+ self.config.audioLittleEndian)
183
201
 
184
- /* queue the results */
185
- self.queueRecv.append({
186
- type: "audio-frame", chunk,
187
- segmentIdx: 0, segmentData
188
- })
202
+ /* segment audio samples as individual VAD-sized frames */
203
+ const segmentData: AudioQueueElementSegment[] = []
204
+ const chunkSize = vadSamplesPerFrame *
205
+ (self.config.audioSampleRate / vadSampleRateTarget)
206
+ const chunks = Math.trunc(data.length / chunkSize)
207
+ for (let i = 0; i < chunks; i++) {
208
+ const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
209
+ const segment: AudioQueueElementSegment = { data: frame }
210
+ segmentData.push(segment)
211
+ }
212
+ if ((chunks * chunkSize) < data.length) {
213
+ const frame = new Float32Array(chunkSize)
214
+ frame.fill(0)
215
+ frame.set(data.slice(chunks * chunkSize, data.length))
216
+ const segment: AudioQueueElementSegment = { data: frame }
217
+ segmentData.push(segment)
218
+ }
189
219
 
190
- /* push segments through Voice Activity Detection (VAD) */
191
- for (const segment of segmentData)
192
- self.vad!.processAudio(segment.data)
220
+ /* queue the results */
221
+ self.queueRecv.append({
222
+ type: "audio-frame", chunk,
223
+ segmentIdx: 0, segmentData
224
+ })
193
225
 
194
- callback()
226
+ /* push segments through Voice Activity Detection (VAD) */
227
+ if (self.vad && !self.destroyed) {
228
+ try {
229
+ for (const segment of segmentData)
230
+ self.vad.processAudio(segment.data)
231
+ }
232
+ catch (error) {
233
+ self.log("error", `VAD processAudio error: ${error}`)
234
+ }
235
+ }
236
+
237
+ callback()
238
+ }
239
+ catch (error) {
240
+ callback(error instanceof Error ? error : new Error("VAD processing failed"))
241
+ }
195
242
  }
196
243
  },
197
244
 
198
245
  /* receive no more audio chunks (writable side of stream) */
199
246
  final (callback) {
247
+ if (self.destroyed) {
248
+ callback()
249
+ return
250
+ }
251
+
200
252
  /* signal end of file */
201
253
  self.queueRecv.append({ type: "audio-eof" })
202
254
  callback()
@@ -204,12 +256,26 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
204
256
 
205
257
  /* send audio chunk(s) (readable side of stream) */
206
258
  read (_size) {
259
+ if (self.destroyed) {
260
+ this.push(null)
261
+ return
262
+ }
263
+
207
264
  /* try to perform read operation from scratch */
208
265
  const tryToRead = () => {
266
+ if (self.destroyed) {
267
+ this.push(null)
268
+ return
269
+ }
270
+
209
271
  /* flush pending audio chunks */
210
272
  const flushPendingChunks = () => {
211
273
  let pushed = 0
212
274
  while (true) {
275
+ if (self.destroyed) {
276
+ this.push(null)
277
+ return
278
+ }
213
279
  const element = self.queueSend.peek()
214
280
  if (element === undefined)
215
281
  break
@@ -233,23 +299,33 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
233
299
  this.push(chunk)
234
300
  pushed++
235
301
  }
236
- else if (self.params.mode === "unplugged" && pushed === 0)
302
+ else if (self.params.mode === "unplugged" && pushed === 0) {
237
303
  /* we have to await chunks now, as in unplugged
238
304
  mode we else would be never called again until
239
305
  we at least once push a new chunk as the result */
240
- tryToRead()
306
+ setTimeout(() => {
307
+ if (self.destroyed)
308
+ return
309
+ tryToRead()
310
+ }, 0)
311
+ return
312
+ }
241
313
  }
242
314
  }
243
315
 
244
316
  /* await forthcoming audio chunks */
245
317
  const awaitForthcomingChunks = () => {
318
+ if (self.destroyed)
319
+ return
246
320
  const element = self.queueSend.peek()
247
321
  if (element !== undefined
248
322
  && element.type === "audio-frame"
249
323
  && element.isSpeech !== undefined)
250
324
  flushPendingChunks()
251
- else
325
+ else if (!self.destroyed) {
252
326
  self.queue.once("write", awaitForthcomingChunks)
327
+ self.activeEventListeners.add(awaitForthcomingChunks)
328
+ }
253
329
  }
254
330
 
255
331
  const element = self.queueSend.peek()
@@ -259,8 +335,10 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
259
335
  && element.type === "audio-frame"
260
336
  && element.isSpeech !== undefined)
261
337
  flushPendingChunks()
262
- else
338
+ else if (!self.destroyed) {
263
339
  self.queue.once("write", awaitForthcomingChunks)
340
+ self.activeEventListeners.add(awaitForthcomingChunks)
341
+ }
264
342
  }
265
343
  tryToRead()
266
344
  }
@@ -269,15 +347,43 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
269
347
 
270
348
  /* close node */
271
349
  async close () {
350
+ /* indicate destruction */
351
+ this.destroyed = true
352
+
353
+ /* cleanup tail timer */
354
+ if (this.tailTimer !== null) {
355
+ clearTimeout(this.tailTimer)
356
+ this.tailTimer = null
357
+ }
358
+
359
+ /* remove all event listeners */
360
+ this.activeEventListeners.forEach((listener) => {
361
+ this.queue.removeListener("write", listener)
362
+ })
363
+ this.activeEventListeners.clear()
364
+
272
365
  /* close stream */
273
366
  if (this.stream !== null) {
274
367
  this.stream.destroy()
275
368
  this.stream = null
276
369
  }
277
370
 
371
+ /* cleanup queue pointers before closing VAD to prevent callback access */
372
+ this.queue.pointerDelete("recv")
373
+ this.queue.pointerDelete("vad")
374
+ this.queue.pointerDelete("send")
375
+
278
376
  /* close VAD */
279
377
  if (this.vad !== null) {
280
- await this.vad.flush()
378
+ try {
379
+ const flushPromise = this.vad.flush()
380
+ const timeoutPromise = new Promise((resolve) =>
381
+ setTimeout(resolve, 5000))
382
+ await Promise.race([ flushPromise, timeoutPromise ])
383
+ }
384
+ catch (error) {
385
+ this.log("warning", `VAD flush error during close: ${error}`)
386
+ }
281
387
  this.vad.destroy()
282
388
  this.vad = null
283
389
  }
@@ -103,8 +103,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
103
103
  decodeStrings: false,
104
104
  highWaterMark: 1,
105
105
  transform (chunk: SpeechFlowChunk, encoding, callback) {
106
- if (!Buffer.isBuffer(chunk.payload))
106
+ if (!Buffer.isBuffer(chunk.payload)) {
107
107
  callback(new Error("invalid chunk payload type"))
108
+ return
109
+ }
108
110
  else if (firstChunk) {
109
111
  if (self.params.mode === "encode") {
110
112
  /* convert raw/PCM to WAV/PCM
@@ -127,6 +129,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
127
129
  }
128
130
  else if (self.params.mode === "decode") {
129
131
  /* convert WAV/PCM to raw/PCM */
132
+ if (chunk.payload.length < 44) {
133
+ callback(new Error("WAV header too short, expected at least 44 bytes"))
134
+ return
135
+ }
130
136
  const header = readWavHeader(chunk.payload)
131
137
  self.log("info", "WAV audio stream: " +
132
138
  `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
@@ -134,20 +140,30 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
134
140
  `channels=${header.channels} ` +
135
141
  `sampleRate=${header.sampleRate} ` +
136
142
  `bitDepth=${header.bitDepth}`)
137
- if (header.audioFormat !== 0x0001 /* PCM */)
138
- throw new Error("WAV not based on PCM format")
139
- if (header.bitDepth !== 16)
140
- throw new Error("WAV not based on 16 bit samples")
141
- if (header.sampleRate !== 48000)
142
- throw new Error("WAV not based on 48Khz sample rate")
143
- if (header.channels !== 1)
144
- throw new Error("WAV not based on mono channel")
143
+ if (header.audioFormat !== 0x0001 /* PCM */) {
144
+ callback(new Error("WAV not based on PCM format"))
145
+ return
146
+ }
147
+ if (header.bitDepth !== self.config.audioBitDepth) {
148
+ callback(new Error(`WAV not based on ${self.config.audioBitDepth} bit samples`))
149
+ return
150
+ }
151
+ if (header.sampleRate !== self.config.audioSampleRate) {
152
+ callback(new Error(`WAV not based on ${self.config.audioSampleRate}Hz sample rate`))
153
+ return
154
+ }
155
+ if (header.channels !== self.config.audioChannels) {
156
+ callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
157
+ return
158
+ }
145
159
  chunk.payload = chunk.payload.subarray(44)
146
160
  this.push(chunk)
147
161
  callback()
148
162
  }
149
- else
150
- throw new Error(`invalid operation mode "${self.params.mode}"`)
163
+ else {
164
+ callback(new Error(`invalid operation mode "${self.params.mode}"`))
165
+ return
166
+ }
151
167
  }
152
168
  else {
153
169
  /* pass-through original chunk */