speechflow 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/etc/stx.conf +54 -58
  3. package/package.json +25 -106
  4. package/{etc → speechflow-cli/etc}/eslint.mjs +1 -2
  5. package/speechflow-cli/etc/stx.conf +77 -0
  6. package/speechflow-cli/package.json +116 -0
  7. package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +148 -64
  8. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
  9. package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +39 -11
  10. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +384 -0
  11. package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +27 -11
  12. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +313 -0
  13. package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +59 -12
  14. package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +11 -4
  15. package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +9 -4
  16. package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +2 -2
  17. package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +1 -1
  18. package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +1 -1
  19. package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +37 -20
  20. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
  21. package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +4 -3
  22. package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +9 -5
  23. package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +16 -8
  24. package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +12 -8
  25. package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +9 -3
  26. package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +5 -2
  27. package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +12 -12
  28. package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
  29. package/{src → speechflow-cli/src}/speechflow-utils.ts +78 -44
  30. package/{src → speechflow-cli/src}/speechflow.ts +188 -53
  31. package/speechflow-ui-db/etc/eslint.mjs +106 -0
  32. package/speechflow-ui-db/etc/htmllint.json +55 -0
  33. package/speechflow-ui-db/etc/stx.conf +79 -0
  34. package/speechflow-ui-db/etc/stylelint.js +46 -0
  35. package/speechflow-ui-db/etc/stylelint.yaml +33 -0
  36. package/speechflow-ui-db/etc/tsc-client.json +30 -0
  37. package/speechflow-ui-db/etc/tsc.node.json +9 -0
  38. package/speechflow-ui-db/etc/vite-client.mts +63 -0
  39. package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
  40. package/speechflow-ui-db/package.json +75 -0
  41. package/speechflow-ui-db/src/app-icon.ai +1989 -4
  42. package/speechflow-ui-db/src/app-icon.svg +26 -0
  43. package/speechflow-ui-db/src/app.styl +64 -0
  44. package/speechflow-ui-db/src/app.vue +221 -0
  45. package/speechflow-ui-db/src/index.html +23 -0
  46. package/speechflow-ui-db/src/index.ts +26 -0
  47. package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
  48. package/speechflow-ui-db/src/tsconfig.json +3 -0
  49. package/speechflow-ui-st/etc/eslint.mjs +106 -0
  50. package/speechflow-ui-st/etc/htmllint.json +55 -0
  51. package/speechflow-ui-st/etc/stx.conf +79 -0
  52. package/speechflow-ui-st/etc/stylelint.js +46 -0
  53. package/speechflow-ui-st/etc/stylelint.yaml +33 -0
  54. package/speechflow-ui-st/etc/tsc-client.json +30 -0
  55. package/speechflow-ui-st/etc/tsc.node.json +9 -0
  56. package/speechflow-ui-st/etc/vite-client.mts +63 -0
  57. package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
  58. package/speechflow-ui-st/package.json +79 -0
  59. package/speechflow-ui-st/src/app-icon.ai +1989 -4
  60. package/speechflow-ui-st/src/app-icon.svg +26 -0
  61. package/speechflow-ui-st/src/app.styl +64 -0
  62. package/speechflow-ui-st/src/app.vue +142 -0
  63. package/speechflow-ui-st/src/index.html +23 -0
  64. package/speechflow-ui-st/src/index.ts +26 -0
  65. package/speechflow-ui-st/src/lib.d.ts +9 -0
  66. package/speechflow-ui-st/src/tsconfig.json +3 -0
  67. package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
  68. package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
  69. package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
  70. package/dst/speechflow-node-a2a-gender.d.ts +0 -18
  71. package/dst/speechflow-node-a2a-gender.js +0 -271
  72. package/dst/speechflow-node-a2a-gender.js.map +0 -1
  73. package/dst/speechflow-node-a2a-meter.d.ts +0 -12
  74. package/dst/speechflow-node-a2a-meter.js +0 -155
  75. package/dst/speechflow-node-a2a-meter.js.map +0 -1
  76. package/dst/speechflow-node-a2a-mute.d.ts +0 -16
  77. package/dst/speechflow-node-a2a-mute.js +0 -91
  78. package/dst/speechflow-node-a2a-mute.js.map +0 -1
  79. package/dst/speechflow-node-a2a-vad.d.ts +0 -16
  80. package/dst/speechflow-node-a2a-vad.js +0 -285
  81. package/dst/speechflow-node-a2a-vad.js.map +0 -1
  82. package/dst/speechflow-node-a2a-wav.d.ts +0 -11
  83. package/dst/speechflow-node-a2a-wav.js +0 -195
  84. package/dst/speechflow-node-a2a-wav.js.map +0 -1
  85. package/dst/speechflow-node-a2t-deepgram.d.ts +0 -15
  86. package/dst/speechflow-node-a2t-deepgram.js +0 -255
  87. package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
  88. package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -16
  89. package/dst/speechflow-node-t2a-elevenlabs.js +0 -195
  90. package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
  91. package/dst/speechflow-node-t2a-kokoro.d.ts +0 -13
  92. package/dst/speechflow-node-t2a-kokoro.js +0 -149
  93. package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
  94. package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
  95. package/dst/speechflow-node-t2t-deepl.js +0 -142
  96. package/dst/speechflow-node-t2t-deepl.js.map +0 -1
  97. package/dst/speechflow-node-t2t-format.d.ts +0 -11
  98. package/dst/speechflow-node-t2t-format.js +0 -82
  99. package/dst/speechflow-node-t2t-format.js.map +0 -1
  100. package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
  101. package/dst/speechflow-node-t2t-ollama.js +0 -247
  102. package/dst/speechflow-node-t2t-ollama.js.map +0 -1
  103. package/dst/speechflow-node-t2t-openai.d.ts +0 -13
  104. package/dst/speechflow-node-t2t-openai.js +0 -227
  105. package/dst/speechflow-node-t2t-openai.js.map +0 -1
  106. package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
  107. package/dst/speechflow-node-t2t-sentence.js +0 -234
  108. package/dst/speechflow-node-t2t-sentence.js.map +0 -1
  109. package/dst/speechflow-node-t2t-subtitle.d.ts +0 -13
  110. package/dst/speechflow-node-t2t-subtitle.js +0 -278
  111. package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
  112. package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
  113. package/dst/speechflow-node-t2t-transformers.js +0 -265
  114. package/dst/speechflow-node-t2t-transformers.js.map +0 -1
  115. package/dst/speechflow-node-x2x-filter.d.ts +0 -11
  116. package/dst/speechflow-node-x2x-filter.js +0 -117
  117. package/dst/speechflow-node-x2x-filter.js.map +0 -1
  118. package/dst/speechflow-node-x2x-trace.d.ts +0 -11
  119. package/dst/speechflow-node-x2x-trace.js +0 -111
  120. package/dst/speechflow-node-x2x-trace.js.map +0 -1
  121. package/dst/speechflow-node-xio-device.d.ts +0 -13
  122. package/dst/speechflow-node-xio-device.js +0 -226
  123. package/dst/speechflow-node-xio-device.js.map +0 -1
  124. package/dst/speechflow-node-xio-file.d.ts +0 -11
  125. package/dst/speechflow-node-xio-file.js +0 -210
  126. package/dst/speechflow-node-xio-file.js.map +0 -1
  127. package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
  128. package/dst/speechflow-node-xio-mqtt.js +0 -185
  129. package/dst/speechflow-node-xio-mqtt.js.map +0 -1
  130. package/dst/speechflow-node-xio-websocket.d.ts +0 -13
  131. package/dst/speechflow-node-xio-websocket.js +0 -278
  132. package/dst/speechflow-node-xio-websocket.js.map +0 -1
  133. package/dst/speechflow-node.d.ts +0 -65
  134. package/dst/speechflow-node.js +0 -180
  135. package/dst/speechflow-node.js.map +0 -1
  136. package/dst/speechflow-utils.d.ts +0 -69
  137. package/dst/speechflow-utils.js +0 -486
  138. package/dst/speechflow-utils.js.map +0 -1
  139. package/dst/speechflow.js +0 -768
  140. package/dst/speechflow.js.map +0 -1
  141. package/src/speechflow-node-a2a-meter.ts +0 -130
  142. package/src/speechflow-node-a2a-vad.ts +0 -285
  143. package/src/speechflow-node-a2t-deepgram.ts +0 -234
  144. package/src/speechflow-node-t2t-subtitle.ts +0 -149
  145. /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
  146. /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
  147. /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
  148. /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
  149. /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
  150. /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
  151. /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
  152. /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
  153. /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
  154. /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
  155. /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
  156. /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
@@ -0,0 +1,384 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { RealTimeVAD } from "@ericedouard/vad-node-realtime"
12
+
13
+ /* internal dependencies */
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
+ import * as utils from "./speechflow-utils"
16
+
17
+ /* audio stream queue element */
18
+ type AudioQueueElementSegment = {
19
+ data: Float32Array,
20
+ isSpeech?: boolean
21
+ }
22
+ type AudioQueueElement = {
23
+ type: "audio-frame",
24
+ chunk: SpeechFlowChunk,
25
+ segmentIdx: number,
26
+ segmentData: AudioQueueElementSegment[],
27
+ isSpeech?: boolean
28
+ } | {
29
+ type: "audio-eof"
30
+ }
31
+
32
+ /* SpeechFlow node for VAD speech-to-speech processing */
33
+ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
34
+ /* declare official node name */
35
+ public static name = "vad"
36
+
37
+ /* internal state */
38
+ private vad: RealTimeVAD | null = null
39
+ private queue = new utils.Queue<AudioQueueElement>()
40
+ private queueRecv = this.queue.pointerUse("recv")
41
+ private queueVAD = this.queue.pointerUse("vad")
42
+ private queueSend = this.queue.pointerUse("send")
43
+ private destroyed = false
44
+ private tailTimer: ReturnType<typeof setTimeout> | null = null
45
+ private activeEventListeners = new Set<() => void>()
46
+
47
+ /* construct node */
48
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
49
+ super(id, cfg, opts, args)
50
+
51
+ /* declare node configuration parameters */
52
+ this.configure({
53
+ mode: { type: "string", val: "silenced", match: /^(?:silenced|unplugged)$/ },
54
+ posSpeechThreshold: { type: "number", val: 0.50 },
55
+ negSpeechThreshold: { type: "number", val: 0.35 },
56
+ minSpeechFrames: { type: "number", val: 2 },
57
+ redemptionFrames: { type: "number", val: 12 },
58
+ preSpeechPadFrames: { type: "number", val: 1 },
59
+ postSpeechTail: { type: "number", val: 1500 }
60
+ })
61
+
62
+ /* declare node input/output format */
63
+ this.input = "audio"
64
+ this.output = "audio"
65
+ }
66
+
67
+ /* open node */
68
+ async open () {
69
+ /* sanity check situation */
70
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
71
+ throw new Error("VAD node currently supports PCM-S16LE audio only")
72
+
73
+ /* clear destruction flag */
74
+ this.destroyed = false
75
+
76
+ /* internal processing constants */
77
+ const vadSampleRateTarget = 16000 /* internal target of VAD */
78
+ const vadSamplesPerFrame = 512 /* required for VAD v5 */
79
+
80
+ /* helper function for timer cleanup */
81
+ const clearTailTimer = () => {
82
+ if (this.tailTimer !== null) {
83
+ clearTimeout(this.tailTimer)
84
+ this.tailTimer = null
85
+ }
86
+ }
87
+
88
+ /* establish Voice Activity Detection (VAD) facility */
89
+ let tail = false
90
+ try {
91
+ this.vad = await RealTimeVAD.new({
92
+ model: "v5",
93
+ sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
94
+ frameSamples: vadSamplesPerFrame, /* after resampling to 16KHz */
95
+ positiveSpeechThreshold: this.params.posSpeechThreshold,
96
+ negativeSpeechThreshold: this.params.negSpeechThreshold,
97
+ minSpeechFrames: this.params.minSpeechFrames,
98
+ redemptionFrames: this.params.redemptionFrames,
99
+ preSpeechPadFrames: this.params.preSpeechPadFrames,
100
+ onSpeechStart: () => {
101
+ if (this.destroyed)
102
+ return
103
+ this.log("info", "VAD: speech start")
104
+ if (this.params.mode === "unplugged") {
105
+ tail = false
106
+ clearTailTimer()
107
+ }
108
+ },
109
+ onSpeechEnd: (audio) => {
110
+ if (this.destroyed)
111
+ return
112
+ const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
113
+ this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
114
+ if (this.params.mode === "unplugged") {
115
+ tail = true
116
+ clearTailTimer()
117
+ this.tailTimer = setTimeout(() => {
118
+ if (this.destroyed || this.tailTimer === null)
119
+ return
120
+ tail = false
121
+ this.tailTimer = null
122
+ }, this.params.postSpeechTail)
123
+ }
124
+ },
125
+ onVADMisfire: () => {
126
+ if (this.destroyed)
127
+ return
128
+ this.log("info", "VAD: speech end (segment too short)")
129
+ if (this.params.mode === "unplugged") {
130
+ tail = true
131
+ clearTailTimer()
132
+ this.tailTimer = setTimeout(() => {
133
+ if (this.destroyed || this.tailTimer === null)
134
+ return
135
+ tail = false
136
+ this.tailTimer = null
137
+ }, this.params.postSpeechTail)
138
+ }
139
+ },
140
+ onFrameProcessed: (audio) => {
141
+ if (this.destroyed)
142
+ return
143
+ try {
144
+ /* annotate the current audio segment */
145
+ const element = this.queueVAD.peek()
146
+ if (element === undefined || element.type !== "audio-frame")
147
+ throw new Error("internal error which cannot happen: no more queued element")
148
+ if (element.segmentIdx >= element.segmentData.length)
149
+ throw new Error("segment index out of bounds")
150
+ const segment = element.segmentData[element.segmentIdx++]
151
+ segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
152
+
153
+ /* annotate the entire audio chunk */
154
+ if (element.segmentIdx >= element.segmentData.length) {
155
+ element.isSpeech = element.segmentData.some(segment => segment.isSpeech)
156
+ this.queueVAD.touch()
157
+ this.queueVAD.walk(+1)
158
+ }
159
+ }
160
+ catch (error) {
161
+ this.log("error", `VAD frame processing error: ${error}`)
162
+ }
163
+ }
164
+ })
165
+ this.vad.start()
166
+ }
167
+ catch (error) {
168
+ throw new Error(`failed to initialize VAD: ${error}`)
169
+ }
170
+
171
+ /* provide Duplex stream and internally attach to VAD */
172
+ const self = this
173
+ this.stream = new Stream.Duplex({
174
+ writableObjectMode: true,
175
+ readableObjectMode: true,
176
+ decodeStrings: false,
177
+ highWaterMark: 1,
178
+
179
+ /* receive audio chunk (writable side of stream) */
180
+ write (chunk: SpeechFlowChunk, encoding, callback) {
181
+ if (self.destroyed) {
182
+ callback(new Error("stream already destroyed"))
183
+ return
184
+ }
185
+ if (!Buffer.isBuffer(chunk.payload))
186
+ callback(new Error("expected audio input as Buffer chunks"))
187
+ else if (chunk.payload.byteLength === 0)
188
+ callback()
189
+ else {
190
+ try {
191
+ /* convert audio samples from PCM/I16 to PCM/F32 */
192
+ const data = utils.convertBufToF32(chunk.payload,
193
+ self.config.audioLittleEndian)
194
+
195
+ /* segment audio samples as individual VAD-sized frames */
196
+ const segmentData: AudioQueueElementSegment[] = []
197
+ const chunkSize = vadSamplesPerFrame *
198
+ (self.config.audioSampleRate / vadSampleRateTarget)
199
+ const chunks = Math.trunc(data.length / chunkSize)
200
+ for (let i = 0; i < chunks; i++) {
201
+ const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
202
+ const segment: AudioQueueElementSegment = { data: frame }
203
+ segmentData.push(segment)
204
+ }
205
+ if ((chunks * chunkSize) < data.length) {
206
+ const frame = new Float32Array(chunkSize)
207
+ frame.fill(0)
208
+ frame.set(data.slice(chunks * chunkSize, data.length))
209
+ const segment: AudioQueueElementSegment = { data: frame }
210
+ segmentData.push(segment)
211
+ }
212
+
213
+ /* queue the results */
214
+ self.queueRecv.append({
215
+ type: "audio-frame", chunk,
216
+ segmentIdx: 0, segmentData
217
+ })
218
+
219
+ /* push segments through Voice Activity Detection (VAD) */
220
+ if (self.vad && !self.destroyed) {
221
+ try {
222
+ for (const segment of segmentData)
223
+ self.vad.processAudio(segment.data)
224
+ }
225
+ catch (error) {
226
+ self.log("error", `VAD processAudio error: ${error}`)
227
+ }
228
+ }
229
+
230
+ callback()
231
+ }
232
+ catch (error) {
233
+ callback(error instanceof Error ? error : new Error("VAD processing failed"))
234
+ }
235
+ }
236
+ },
237
+
238
+ /* receive no more audio chunks (writable side of stream) */
239
+ final (callback) {
240
+ if (self.destroyed) {
241
+ callback()
242
+ return
243
+ }
244
+
245
+ /* signal end of file */
246
+ self.queueRecv.append({ type: "audio-eof" })
247
+ callback()
248
+ },
249
+
250
+ /* send audio chunk(s) (readable side of stream) */
251
+ read (_size) {
252
+ if (self.destroyed) {
253
+ this.push(null)
254
+ return
255
+ }
256
+
257
+ /* try to perform read operation from scratch */
258
+ const tryToRead = () => {
259
+ if (self.destroyed) {
260
+ this.push(null)
261
+ return
262
+ }
263
+
264
+ /* flush pending audio chunks */
265
+ const flushPendingChunks = () => {
266
+ let pushed = 0
267
+ while (true) {
268
+ if (self.destroyed) {
269
+ this.push(null)
270
+ return
271
+ }
272
+ const element = self.queueSend.peek()
273
+ if (element === undefined)
274
+ break
275
+ else if (element.type === "audio-eof") {
276
+ this.push(null)
277
+ break
278
+ }
279
+ else if (element.type === "audio-frame"
280
+ && element.isSpeech === undefined)
281
+ break
282
+ self.queueSend.walk(+1)
283
+ self.queue.trim()
284
+ if (element.isSpeech) {
285
+ this.push(element.chunk)
286
+ pushed++
287
+ }
288
+ else if (self.params.mode === "silenced") {
289
+ const chunk = element.chunk.clone()
290
+ const buffer = chunk.payload as Buffer
291
+ buffer.fill(0)
292
+ this.push(chunk)
293
+ pushed++
294
+ }
295
+ else if (self.params.mode === "unplugged" && pushed === 0) {
296
+ /* we have to await chunks now, as in unplugged
297
+ mode we else would be never called again until
298
+ we at least once push a new chunk as the result */
299
+ setTimeout(() => {
300
+ if (self.destroyed)
301
+ return
302
+ tryToRead()
303
+ }, 0)
304
+ return
305
+ }
306
+ }
307
+ }
308
+
309
+ /* await forthcoming audio chunks */
310
+ const awaitForthcomingChunks = () => {
311
+ if (self.destroyed)
312
+ return
313
+ const element = self.queueSend.peek()
314
+ if (element !== undefined
315
+ && element.type === "audio-frame"
316
+ && element.isSpeech !== undefined)
317
+ flushPendingChunks()
318
+ else if (!self.destroyed) {
319
+ self.queue.once("write", awaitForthcomingChunks)
320
+ self.activeEventListeners.add(awaitForthcomingChunks)
321
+ }
322
+ }
323
+
324
+ const element = self.queueSend.peek()
325
+ if (element !== undefined && element.type === "audio-eof")
326
+ this.push(null)
327
+ else if (element !== undefined
328
+ && element.type === "audio-frame"
329
+ && element.isSpeech !== undefined)
330
+ flushPendingChunks()
331
+ else if (!self.destroyed) {
332
+ self.queue.once("write", awaitForthcomingChunks)
333
+ self.activeEventListeners.add(awaitForthcomingChunks)
334
+ }
335
+ }
336
+ tryToRead()
337
+ }
338
+ })
339
+ }
340
+
341
+ /* close node */
342
+ async close () {
343
+ /* indicate destruction */
344
+ this.destroyed = true
345
+
346
+ /* cleanup tail timer */
347
+ if (this.tailTimer !== null) {
348
+ clearTimeout(this.tailTimer)
349
+ this.tailTimer = null
350
+ }
351
+
352
+ /* remove all event listeners */
353
+ this.activeEventListeners.forEach((listener) => {
354
+ this.queue.removeListener("write", listener)
355
+ })
356
+ this.activeEventListeners.clear()
357
+
358
+ /* close stream */
359
+ if (this.stream !== null) {
360
+ this.stream.destroy()
361
+ this.stream = null
362
+ }
363
+
364
+ /* cleanup queue pointers before closing VAD to prevent callback access */
365
+ this.queue.pointerDelete("recv")
366
+ this.queue.pointerDelete("vad")
367
+ this.queue.pointerDelete("send")
368
+
369
+ /* close VAD */
370
+ if (this.vad !== null) {
371
+ try {
372
+ const flushPromise = this.vad.flush()
373
+ const timeoutPromise = new Promise((resolve) =>
374
+ setTimeout(resolve, 5000))
375
+ await Promise.race([ flushPromise, timeoutPromise ])
376
+ }
377
+ catch (error) {
378
+ this.log("warning", `VAD flush error during close: ${error}`)
379
+ }
380
+ this.vad.destroy()
381
+ this.vad = null
382
+ }
383
+ }
384
+ }
@@ -103,8 +103,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
103
103
  decodeStrings: false,
104
104
  highWaterMark: 1,
105
105
  transform (chunk: SpeechFlowChunk, encoding, callback) {
106
- if (!Buffer.isBuffer(chunk.payload))
106
+ if (!Buffer.isBuffer(chunk.payload)) {
107
107
  callback(new Error("invalid chunk payload type"))
108
+ return
109
+ }
108
110
  else if (firstChunk) {
109
111
  if (self.params.mode === "encode") {
110
112
  /* convert raw/PCM to WAV/PCM
@@ -127,6 +129,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
127
129
  }
128
130
  else if (self.params.mode === "decode") {
129
131
  /* convert WAV/PCM to raw/PCM */
132
+ if (chunk.payload.length < 44) {
133
+ callback(new Error("WAV header too short, expected at least 44 bytes"))
134
+ return
135
+ }
130
136
  const header = readWavHeader(chunk.payload)
131
137
  self.log("info", "WAV audio stream: " +
132
138
  `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
@@ -134,20 +140,30 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
134
140
  `channels=${header.channels} ` +
135
141
  `sampleRate=${header.sampleRate} ` +
136
142
  `bitDepth=${header.bitDepth}`)
137
- if (header.audioFormat !== 0x0001 /* PCM */)
138
- throw new Error("WAV not based on PCM format")
139
- if (header.bitDepth !== 16)
140
- throw new Error("WAV not based on 16 bit samples")
141
- if (header.sampleRate !== 48000)
142
- throw new Error("WAV not based on 48Khz sample rate")
143
- if (header.channels !== 1)
144
- throw new Error("WAV not based on mono channel")
143
+ if (header.audioFormat !== 0x0001 /* PCM */) {
144
+ callback(new Error("WAV not based on PCM format"))
145
+ return
146
+ }
147
+ if (header.bitDepth !== self.config.audioBitDepth) {
148
+ callback(new Error(`WAV not based on ${self.config.audioBitDepth} bit samples`))
149
+ return
150
+ }
151
+ if (header.sampleRate !== self.config.audioSampleRate) {
152
+ callback(new Error(`WAV not based on ${self.config.audioSampleRate}Hz sample rate`))
153
+ return
154
+ }
155
+ if (header.channels !== self.config.audioChannels) {
156
+ callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
157
+ return
158
+ }
145
159
  chunk.payload = chunk.payload.subarray(44)
146
160
  this.push(chunk)
147
161
  callback()
148
162
  }
149
- else
150
- throw new Error(`invalid operation mode "${self.params.mode}"`)
163
+ else {
164
+ callback(new Error(`invalid operation mode "${self.params.mode}"`))
165
+ return
166
+ }
151
167
  }
152
168
  else {
153
169
  /* pass-through original chunk */