speechflow 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +46 -11
  3. package/dst/speechflow-node-a2a-gender.d.ts +17 -0
  4. package/dst/speechflow-node-a2a-gender.js +272 -0
  5. package/dst/speechflow-node-a2a-gender.js.map +1 -0
  6. package/dst/speechflow-node-a2a-meter.js +7 -3
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -1
  8. package/dst/speechflow-node-a2a-mute.js +1 -0
  9. package/dst/speechflow-node-a2a-mute.js.map +1 -1
  10. package/dst/speechflow-node-a2a-vad.js +47 -63
  11. package/dst/speechflow-node-a2a-vad.js.map +1 -1
  12. package/dst/speechflow-node-a2a-wav.js +145 -122
  13. package/dst/speechflow-node-a2a-wav.js.map +1 -1
  14. package/dst/speechflow-node-a2t-deepgram.d.ts +3 -0
  15. package/dst/speechflow-node-a2t-deepgram.js +29 -4
  16. package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  17. package/dst/speechflow-node-t2a-elevenlabs.d.ts +3 -0
  18. package/dst/speechflow-node-t2a-elevenlabs.js +18 -6
  19. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  20. package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  21. package/dst/speechflow-node-t2t-deepl.d.ts +3 -0
  22. package/dst/speechflow-node-t2t-deepl.js +8 -1
  23. package/dst/speechflow-node-t2t-deepl.js.map +1 -1
  24. package/dst/speechflow-node-t2t-format.js.map +1 -1
  25. package/dst/speechflow-node-t2t-ollama.js.map +1 -1
  26. package/dst/speechflow-node-t2t-openai.js +1 -1
  27. package/dst/speechflow-node-t2t-openai.js.map +1 -1
  28. package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  29. package/dst/speechflow-node-t2t-transformers.js.map +1 -1
  30. package/dst/speechflow-node-x2x-filter.d.ts +11 -0
  31. package/dst/speechflow-node-x2x-filter.js +113 -0
  32. package/dst/speechflow-node-x2x-filter.js.map +1 -0
  33. package/dst/speechflow-node-x2x-trace.js +25 -11
  34. package/dst/speechflow-node-x2x-trace.js.map +1 -1
  35. package/dst/speechflow-node-xio-device.js +17 -6
  36. package/dst/speechflow-node-xio-device.js.map +1 -1
  37. package/dst/speechflow-node-xio-file.js +61 -28
  38. package/dst/speechflow-node-xio-file.js.map +1 -1
  39. package/dst/speechflow-node-xio-mqtt.js +7 -5
  40. package/dst/speechflow-node-xio-mqtt.js.map +1 -1
  41. package/dst/speechflow-node-xio-websocket.js +5 -5
  42. package/dst/speechflow-node-xio-websocket.js.map +1 -1
  43. package/dst/speechflow-node.d.ts +5 -1
  44. package/dst/speechflow-node.js +9 -2
  45. package/dst/speechflow-node.js.map +1 -1
  46. package/dst/speechflow-utils.d.ts +14 -1
  47. package/dst/speechflow-utils.js +110 -2
  48. package/dst/speechflow-utils.js.map +1 -1
  49. package/dst/speechflow.js +73 -14
  50. package/dst/speechflow.js.map +1 -1
  51. package/etc/speechflow.yaml +53 -26
  52. package/package.json +12 -10
  53. package/src/speechflow-node-a2a-gender.ts +272 -0
  54. package/src/speechflow-node-a2a-meter.ts +8 -4
  55. package/src/speechflow-node-a2a-mute.ts +1 -0
  56. package/src/speechflow-node-a2a-vad.ts +58 -68
  57. package/src/speechflow-node-a2a-wav.ts +128 -91
  58. package/src/speechflow-node-a2t-deepgram.ts +32 -5
  59. package/src/speechflow-node-t2a-elevenlabs.ts +21 -8
  60. package/src/speechflow-node-t2a-kokoro.ts +3 -3
  61. package/src/speechflow-node-t2t-deepl.ts +11 -3
  62. package/src/speechflow-node-t2t-format.ts +2 -2
  63. package/src/speechflow-node-t2t-ollama.ts +2 -2
  64. package/src/speechflow-node-t2t-openai.ts +3 -3
  65. package/src/speechflow-node-t2t-subtitle.ts +1 -1
  66. package/src/speechflow-node-t2t-transformers.ts +2 -2
  67. package/src/speechflow-node-x2x-filter.ts +122 -0
  68. package/src/speechflow-node-x2x-trace.ts +29 -12
  69. package/src/speechflow-node-xio-device.ts +24 -9
  70. package/src/speechflow-node-xio-file.ts +76 -36
  71. package/src/speechflow-node-xio-mqtt.ts +11 -9
  72. package/src/speechflow-node-xio-websocket.ts +7 -7
  73. package/src/speechflow-node.ts +11 -2
  74. package/src/speechflow-utils.ts +81 -2
  75. package/src/speechflow.ts +96 -35
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "speechflow",
3
- "version": "1.0.0",
4
- "x-stdver": "1.0.0-GA",
5
- "x-release": "2025-07-16",
3
+ "version": "1.2.0",
4
+ "x-stdver": "1.2.0-GA",
5
+ "x-release": "2025-07-21",
6
6
  "homepage": "https://github.com/rse/speechflow",
7
7
  "description": "Speech Processing Flow Graph",
8
8
  "license": "GPL-3.0-only",
@@ -26,7 +26,7 @@
26
26
  "@elevenlabs/elevenlabs-js": "2.6.0",
27
27
  "stream-transform": "3.4.0",
28
28
  "get-stream": "9.0.1",
29
- "@dotenvx/dotenvx": "1.47.6",
29
+ "@dotenvx/dotenvx": "1.48.1",
30
30
  "speex-resampler": "3.0.1",
31
31
  "pcm-convert": "1.6.5",
32
32
  "object-path": "0.11.8",
@@ -39,13 +39,13 @@
39
39
  "hapi-plugin-websocket": "2.4.11",
40
40
  "@opensumi/reconnecting-websocket": "4.4.0",
41
41
  "ollama": "0.5.16",
42
- "openai": "5.10.0",
42
+ "openai": "5.10.1",
43
43
  "@rse/ffmpeg": "1.4.2",
44
44
  "ffmpeg-stream": "1.0.1",
45
45
  "installed-packages": "1.0.13",
46
46
  "syspath": "1.0.8",
47
47
  "wav": "1.0.2",
48
- "mqtt": "5.13.2",
48
+ "mqtt": "5.13.3",
49
49
  "cbor2": "2.0.1",
50
50
  "arktype": "2.1.20",
51
51
  "pure-uuid": "1.8.1",
@@ -55,7 +55,9 @@
55
55
  "kokoro-js": "1.2.1",
56
56
  "@ericedouard/vad-node-realtime": "0.2.0",
57
57
  "luxon": "3.7.1",
58
- "wrap-text": "1.0.10"
58
+ "node-interval-tree": "2.1.2",
59
+ "wrap-text": "1.0.10",
60
+ "cli-table3": "0.6.5"
59
61
  },
60
62
  "devDependencies": {
61
63
  "eslint": "9.31.0",
@@ -66,12 +68,12 @@
66
68
  "eslint-plugin-node": "11.1.0",
67
69
  "@typescript-eslint/eslint-plugin": "8.37.0",
68
70
  "@typescript-eslint/parser": "8.37.0",
69
- "oxlint": "1.6.0",
70
- "eslint-plugin-oxlint": "1.6.0",
71
+ "oxlint": "1.7.0",
72
+ "eslint-plugin-oxlint": "1.7.0",
71
73
  "@biomejs/biome": "2.0.6",
72
74
  "eslint-config-biome": "1.9.4",
73
75
 
74
- "@types/node": "24.0.14",
76
+ "@types/node": "24.0.15",
75
77
  "@types/yargs": "17.0.33",
76
78
  "@types/js-yaml": "4.0.9",
77
79
  "@types/object-path": "0.11.4",
@@ -0,0 +1,272 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
+
11
+ /* external dependencies */
12
+ import * as Transformers from "@huggingface/transformers"
13
+ import { WaveFile } from "wavefile"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as utils from "./speechflow-utils"
18
+
19
+ /* audio stream queue element */
20
+ type AudioQueueElement = {
21
+ type: "audio-frame",
22
+ chunk: SpeechFlowChunk,
23
+ data: Float32Array,
24
+ gender?: "male" | "female"
25
+ } | {
26
+ type: "audio-eof"
27
+ }
28
+
29
+ /* SpeechFlow node for Gender recognition */
30
+ export default class SpeechFlowNodeGender extends SpeechFlowNode {
31
+ /* declare official node name */
32
+ public static name = "gender"
33
+
34
+ /* internal state */
35
+ private static speexInitialized = false
36
+ private classifier: Transformers.AudioClassificationPipeline | null = null
37
+ private queue = new utils.Queue<AudioQueueElement>()
38
+ private queueRecv = this.queue.pointerUse("recv")
39
+ private queueAC = this.queue.pointerUse("ac")
40
+ private queueSend = this.queue.pointerUse("send")
41
+
42
+ /* construct node */
43
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
44
+ super(id, cfg, opts, args)
45
+
46
+ /* declare node configuration parameters */
47
+ this.configure({
48
+ window: { type: "number", pos: 0, val: 500 }
49
+ })
50
+
51
+ /* declare node input/output format */
52
+ this.input = "audio"
53
+ this.output = "audio"
54
+ }
55
+
56
+ /* open node */
57
+ async open () {
58
+ /* sanity check situation */
59
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
60
+ throw new Error("Gender node currently supports PCM-S16LE audio only")
61
+
62
+ /* pass-through logging */
63
+ const log = (level: string, msg: string) => { this.log(level, msg) }
64
+
65
+ /* the used model */
66
+ const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
67
+
68
+ /* track download progress when instantiating Transformers engine and model */
69
+ const progressState = new Map<string, number>()
70
+ const progressCallback: Transformers.ProgressCallback = (progress: any) => {
71
+ let artifact = model
72
+ if (typeof progress.file === "string")
73
+ artifact += `:${progress.file}`
74
+ let percent = 0
75
+ if (typeof progress.loaded === "number" && typeof progress.total === "number")
76
+ percent = (progress.loaded as number / progress.total as number) * 100
77
+ else if (typeof progress.progress === "number")
78
+ percent = progress.progress
79
+ if (percent > 0)
80
+ progressState.set(artifact, percent)
81
+ }
82
+ const interval = setInterval(() => {
83
+ for (const [ artifact, percent ] of progressState) {
84
+ this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
85
+ if (percent >= 1.0)
86
+ progressState.delete(artifact)
87
+ }
88
+ }, 1000)
89
+
90
+ /* instantiate Transformers engine and model */
91
+ const pipeline = Transformers.pipeline("audio-classification", model, {
92
+ cache_dir: path.join(this.config.cacheDir, "gender"),
93
+ dtype: "q4",
94
+ device: "auto",
95
+ progress_callback: progressCallback
96
+ })
97
+ this.classifier = await pipeline
98
+ clearInterval(interval)
99
+ if (this.classifier === null)
100
+ throw new Error("failed to instantiate classifier pipeline")
101
+
102
+ /* classify a single large-enough concatenated audio frame */
103
+ const classify = async (data: Float32Array) => {
104
+ const result = await this.classifier!(data)
105
+ const classified: Transformers.AudioClassificationOutput =
106
+ Array.isArray(result) ? result as Transformers.AudioClassificationOutput : [ result ]
107
+ const c1 = classified.find((c: any) => c.label === "male")
108
+ const c2 = classified.find((c: any) => c.label === "female")
109
+ const male = c1 ? c1.score : 0.0
110
+ const female = c2 ? c2.score : 0.0
111
+ return (male > female ? "male" : "female")
112
+ }
113
+
114
+ /* work off queued audio frames */
115
+ const frameWindowDuration = 0.5
116
+ const frameWindowSamples = frameWindowDuration * this.config.audioSampleRate
117
+ let lastGender = ""
118
+ let workingOffTimer: ReturnType<typeof setTimeout> | null = null
119
+ let workingOff = false
120
+ const workOffQueue = async () => {
121
+ /* control working off round */
122
+ if (workingOff)
123
+ return
124
+ workingOff = true
125
+ if (workingOffTimer !== null) {
126
+ clearTimeout(workingOffTimer)
127
+ workingOffTimer = null
128
+ }
129
+
130
+ let pos0 = this.queueAC.position()
131
+ const posL = this.queueAC.maxPosition()
132
+ const data = new Float32Array(frameWindowSamples)
133
+ data.fill(0)
134
+ let samples = 0
135
+ let pos = pos0
136
+ while (pos < posL && samples < frameWindowSamples) {
137
+ const element = this.queueAC.peek(pos)
138
+ if (element === undefined || element.type !== "audio-frame")
139
+ break
140
+ if ((samples + element.data.length) < frameWindowSamples) {
141
+ data.set(element.data, samples)
142
+ samples += element.data.length
143
+ }
144
+ pos++
145
+ }
146
+ if (pos0 < pos && samples > frameWindowSamples * 0.75) {
147
+ const gender = await classify(data)
148
+ const posM = pos0 + Math.trunc((pos - pos0) * 0.25)
149
+ while (pos0 < posM && pos0 < posL) {
150
+ const element = this.queueAC.peek(pos0)
151
+ if (element === undefined || element.type !== "audio-frame")
152
+ break
153
+ element.gender = gender
154
+ this.queueAC.touch()
155
+ this.queueAC.walk(+1)
156
+ pos0++
157
+ }
158
+ if (lastGender !== gender) {
159
+ log("info", `gender now recognized as <${gender}>`)
160
+ lastGender = gender
161
+ }
162
+ }
163
+
164
+ /* re-initiate working off round */
165
+ workingOff = false
166
+ workingOffTimer = setTimeout(() => { workOffQueue() }, 100)
167
+ this.queue.once("write", () => { workOffQueue() })
168
+ }
169
+ this.queue.once("write", () => { workOffQueue() })
170
+
171
+ /* define sample rate required by model */
172
+ const sampleRateTarget = 16000
173
+
174
+ /* provide Duplex stream and internally attach to classifier */
175
+ const self = this
176
+ this.stream = new Stream.Duplex({
177
+ writableObjectMode: true,
178
+ readableObjectMode: true,
179
+ decodeStrings: false,
180
+
181
+ /* receive audio chunk (writable side of stream) */
182
+ write (chunk: SpeechFlowChunk, encoding, callback) {
183
+ if (!Buffer.isBuffer(chunk.payload))
184
+ callback(new Error("expected audio input as Buffer chunks"))
185
+ else if (chunk.payload.byteLength === 0)
186
+ callback()
187
+ else {
188
+ /* convert audio samples from PCM/I16/48KHz to PCM/F32/16KHz */
189
+ let data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
190
+ const wav = new WaveFile()
191
+ wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
192
+ wav.toSampleRate(sampleRateTarget, { method: "cubic" })
193
+ data = wav.getSamples(false, Float32Array<ArrayBuffer>) as
194
+ any as Float32Array<ArrayBuffer>
195
+
196
+ /* queue chunk and converted data */
197
+ self.queueRecv.append({ type: "audio-frame", chunk, data })
198
+
199
+ callback()
200
+ }
201
+ },
202
+
203
+ /* receive no more audio chunks (writable side of stream) */
204
+ final (callback) {
205
+ /* signal end of file */
206
+ self.queueRecv.append({ type: "audio-eof" })
207
+ callback()
208
+ },
209
+
210
+ /* send audio chunk(s) (readable side of stream) */
211
+ read (_size) {
212
+ /* flush pending audio chunks */
213
+ const flushPendingChunks = () => {
214
+ while (true) {
215
+ const element = self.queueSend.peek()
216
+ if (element === undefined)
217
+ break
218
+ else if (element.type === "audio-eof") {
219
+ this.push(null)
220
+ break
221
+ }
222
+ else if (element.type === "audio-frame"
223
+ && element.gender === undefined)
224
+ break
225
+ const duration = utils.audioArrayDuration(element.data)
226
+ log("info", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
227
+ element.chunk.meta.set("gender", element.gender)
228
+ this.push(element.chunk)
229
+ self.queueSend.walk(+1)
230
+ self.queue.trim()
231
+ }
232
+ }
233
+
234
+ /* await forthcoming audio chunks */
235
+ const awaitForthcomingChunks = () => {
236
+ const element = self.queueSend.peek()
237
+ if (element !== undefined
238
+ && element.type === "audio-frame"
239
+ && element.gender !== undefined)
240
+ flushPendingChunks()
241
+ else
242
+ self.queue.once("write", awaitForthcomingChunks)
243
+ }
244
+
245
+ const element = self.queueSend.peek()
246
+ if (element !== undefined && element.type === "audio-eof")
247
+ this.push(null)
248
+ else if (element !== undefined
249
+ && element.type === "audio-frame"
250
+ && element.gender !== undefined)
251
+ flushPendingChunks()
252
+ else
253
+ self.queue.once("write", awaitForthcomingChunks)
254
+ }
255
+ })
256
+ }
257
+
258
+ /* close node */
259
+ async close () {
260
+ /* close stream */
261
+ if (this.stream !== null) {
262
+ this.stream.destroy()
263
+ this.stream = null
264
+ }
265
+
266
+ /* close classifier */
267
+ if (this.classifier !== null) {
268
+ this.classifier.dispose()
269
+ this.classifier = null
270
+ }
271
+ }
272
+ }
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { getLUFS, getRMS, AudioData } from "audio-inspect"
@@ -28,7 +28,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
28
28
 
29
29
  /* declare node configuration parameters */
30
30
  this.configure({
31
- interval: { type: "number", val: 250 }
31
+ interval: { type: "number", pos: 0, val: 250 }
32
32
  })
33
33
 
34
34
  /* declare node input/output format */
@@ -40,7 +40,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
40
40
  async open () {
41
41
  /* sanity check situation */
42
42
  if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
43
- throw new Error("VAD node currently supports PCM-S16LE audio only")
43
+ throw new Error("meter node currently supports PCM-S16LE audio only")
44
44
 
45
45
  /* internal state */
46
46
  const sampleWindowDuration = 3 /* LUFS-S requires 3s */
@@ -50,7 +50,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
50
50
  let lufss = 0
51
51
  let rms = 0
52
52
 
53
- /* setup loundess emitting interval */
53
+ /* setup loudness emitting interval */
54
54
  this.interval = setInterval(() => {
55
55
  this.log("info", `LUFS-S: ${lufss.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
56
56
  this.sendResponse([ "meter", "LUFS-S", lufss ])
@@ -104,6 +104,10 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
104
104
  this.push(chunk)
105
105
  callback()
106
106
  }
107
+ },
108
+ final (callback) {
109
+ this.push(null)
110
+ callback()
107
111
  }
108
112
  })
109
113
  }
@@ -72,6 +72,7 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
72
72
  else if (self.muteMode === "silenced") {
73
73
  /* pass-through a silenced chunk */
74
74
  chunk = chunk.clone()
75
+ chunk.meta.set("muted", true)
75
76
  const buffer = chunk.payload as Buffer
76
77
  buffer.fill(0)
77
78
  callback()
@@ -9,19 +9,24 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { RealTimeVAD } from "@ericedouard/vad-node-realtime"
12
- import { Duration } from "luxon"
13
12
 
14
13
  /* internal dependencies */
15
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
15
  import * as utils from "./speechflow-utils"
17
16
 
18
17
  /* audio stream queue element */
18
+ type AudioQueueElementSegment = {
19
+ data: Float32Array,
20
+ isSpeech?: boolean
21
+ }
19
22
  type AudioQueueElement = {
20
- type: "audio-frame",
21
- chunk: SpeechFlowChunk,
22
- isSpeech?: boolean
23
+ type: "audio-frame",
24
+ chunk: SpeechFlowChunk,
25
+ segmentIdx: number,
26
+ segmentData: AudioQueueElementSegment[],
27
+ isSpeech?: boolean
23
28
  } | {
24
- type: "audio-eof"
29
+ type: "audio-eof"
25
30
  }
26
31
 
27
32
  /* SpeechFlow node for VAD speech-to-speech processing */
@@ -89,10 +94,22 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
89
94
  log("info", "VAD: speech end (segment too short)")
90
95
  },
91
96
  onFrameProcessed: (audio) => {
92
- /* annotate the current audio frame */
97
+ /* annotate the current audio segment */
93
98
  const element = this.queueVAD.peek()
94
- if (element !== undefined && element.type === "audio-frame") {
95
- const isSpeech = audio.isSpeech > audio.notSpeech
99
+ if (element === undefined || element.type !== "audio-frame")
100
+ throw new Error("internal error which cannot happen: no more queued element")
101
+ const segment = element.segmentData[element.segmentIdx++]
102
+ segment.isSpeech = (audio.isSpeech > audio.notSpeech)
103
+
104
+ /* annotate the entire audio chunk */
105
+ if (element.segmentIdx >= element.segmentData.length) {
106
+ let isSpeech = false
107
+ for (const segment of element.segmentData) {
108
+ if (segment.isSpeech) {
109
+ isSpeech = true
110
+ break
111
+ }
112
+ }
96
113
  element.isSpeech = isSpeech
97
114
  this.queueVAD.touch()
98
115
  this.queueVAD.walk(+1)
@@ -102,14 +119,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
102
119
  this.vad.start()
103
120
 
104
121
  /* provide Duplex stream and internally attach to VAD */
105
- const vad = this.vad
106
- const cfg = this.config
107
- const queue = this.queue
108
- const queueRecv = this.queueRecv
109
- const queueSend = this.queueSend
110
- const mode = this.params.mode
111
- let carrySamples = new Float32Array()
112
- let carryStart = Duration.fromDurationLike(0)
122
+ const self = this
113
123
  this.stream = new Stream.Duplex({
114
124
  writableObjectMode: true,
115
125
  readableObjectMode: true,
@@ -123,38 +133,34 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
123
133
  callback()
124
134
  else {
125
135
  /* convert audio samples from PCM/I16 to PCM/F32 */
126
- let data = utils.convertBufToF32(chunk.payload, cfg.audioLittleEndian)
127
- let start = chunk.timestampStart
128
-
129
- /* merge previous carry samples */
130
- if (carrySamples.length > 0) {
131
- start = carryStart
132
- const merged = new Float32Array(carrySamples.length + data.length)
133
- merged.set(carrySamples)
134
- merged.set(data, carrySamples.length)
135
- data = merged
136
- carrySamples = new Float32Array()
137
- }
136
+ const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
138
137
 
139
- /* queue audio samples as individual VAD-sized frames
140
- and in parallel send it into the Voice Activity Detection (VAD) */
141
- const chunkSize = (vadSamplesPerFrame * (cfg.audioSampleRate / vadSampleRateTarget))
138
+ /* segment audio samples as individual VAD-sized frames */
139
+ const segmentData: AudioQueueElementSegment[] = []
140
+ const chunkSize = vadSamplesPerFrame * (self.config.audioSampleRate / vadSampleRateTarget)
142
141
  const chunks = Math.trunc(data.length / chunkSize)
143
142
  for (let i = 0; i < chunks; i++) {
144
143
  const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
145
- const buf = utils.convertF32ToBuf(frame)
146
- const duration = utils.audioBufferDuration(buf)
147
- const end = start.plus(duration)
148
- const chunk = new SpeechFlowChunk(start, end, "final", "audio", buf)
149
- queueRecv.append({ type: "audio-frame", chunk })
150
- vad.processAudio(frame)
151
- start = end
144
+ const segment: AudioQueueElementSegment = { data: frame }
145
+ segmentData.push(segment)
152
146
  }
147
+ if ((chunks * chunkSize) < data.length) {
148
+ const frame = new Float32Array(chunkSize)
149
+ frame.fill(0)
150
+ frame.set(data.slice(chunks * chunkSize, data.length))
151
+ const segment: AudioQueueElementSegment = { data: frame }
152
+ segmentData.push(segment)
153
+ }
154
+
155
+ /* queue the results */
156
+ self.queueRecv.append({
157
+ type: "audio-frame", chunk,
158
+ segmentIdx: 0, segmentData
159
+ })
153
160
 
154
- /* remember new carry samples */
155
- const bulkLen = chunks * chunkSize
156
- carrySamples = data.slice(bulkLen)
157
- carryStart = start
161
+ /* push segments through Voice Activity Detection (VAD) */
162
+ for (const segment of segmentData)
163
+ self.vad!.processAudio(segment.data)
158
164
 
159
165
  callback()
160
166
  }
@@ -162,25 +168,8 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
162
168
 
163
169
  /* receive no more audio chunks (writable side of stream) */
164
170
  final (callback) {
165
- /* flush pending audio chunks */
166
- if (carrySamples.length > 0) {
167
- const chunkSize = (vadSamplesPerFrame * (cfg.audioSampleRate / vadSampleRateTarget))
168
- if (carrySamples.length < chunkSize) {
169
- const merged = new Float32Array(chunkSize)
170
- merged.set(carrySamples)
171
- merged.fill(0.0, carrySamples.length, chunkSize)
172
- carrySamples = merged
173
- }
174
- const buf = utils.convertF32ToBuf(carrySamples)
175
- const duration = utils.audioBufferDuration(buf)
176
- const end = carryStart.plus(duration)
177
- const chunk = new SpeechFlowChunk(carryStart, end, "final", "audio", buf)
178
- queueRecv.append({ type: "audio-frame", chunk })
179
- vad.processAudio(carrySamples)
180
- }
181
-
182
171
  /* signal end of file */
183
- queueRecv.append({ type: "audio-eof" })
172
+ self.queueRecv.append({ type: "audio-eof" })
184
173
  callback()
185
174
  },
186
175
 
@@ -192,7 +181,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
192
181
  const flushPendingChunks = () => {
193
182
  let pushed = 0
194
183
  while (true) {
195
- const element = queueSend.peek()
184
+ const element = self.queueSend.peek()
196
185
  if (element === undefined)
197
186
  break
198
187
  else if (element.type === "audio-eof") {
@@ -202,19 +191,20 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
202
191
  else if (element.type === "audio-frame"
203
192
  && element.isSpeech === undefined)
204
193
  break
205
- queueSend.walk(+1)
194
+ self.queueSend.walk(+1)
195
+ self.queue.trim()
206
196
  if (element.isSpeech) {
207
197
  this.push(element.chunk)
208
198
  pushed++
209
199
  }
210
- else if (mode === "silenced") {
200
+ else if (self.params.mode === "silenced") {
211
201
  const chunk = element.chunk.clone()
212
202
  const buffer = chunk.payload as Buffer
213
203
  buffer.fill(0)
214
204
  this.push(chunk)
215
205
  pushed++
216
206
  }
217
- else if (mode === "unplugged" && pushed === 0)
207
+ else if (self.params.mode === "unplugged" && pushed === 0)
218
208
  /* we have to await chunks now, as in unplugged
219
209
  mode we else would be never called again until
220
210
  we at least once push a new chunk as the result */
@@ -224,16 +214,16 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
224
214
 
225
215
  /* await forthcoming audio chunks */
226
216
  const awaitForthcomingChunks = () => {
227
- const element = queueSend.peek()
217
+ const element = self.queueSend.peek()
228
218
  if (element !== undefined
229
219
  && element.type === "audio-frame"
230
220
  && element.isSpeech !== undefined)
231
221
  flushPendingChunks()
232
222
  else
233
- queue.once("write", awaitForthcomingChunks)
223
+ self.queue.once("write", awaitForthcomingChunks)
234
224
  }
235
225
 
236
- const element = queueSend.peek()
226
+ const element = self.queueSend.peek()
237
227
  if (element !== undefined && element.type === "audio-eof")
238
228
  this.push(null)
239
229
  else if (element !== undefined
@@ -241,7 +231,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
241
231
  && element.isSpeech !== undefined)
242
232
  flushPendingChunks()
243
233
  else
244
- queue.once("write", awaitForthcomingChunks)
234
+ self.queue.once("write", awaitForthcomingChunks)
245
235
  }
246
236
  tryToRead()
247
237
  }