speechflow 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/etc/stx.conf +54 -58
- package/package.json +25 -106
- package/{etc → speechflow-cli/etc}/eslint.mjs +1 -2
- package/speechflow-cli/etc/stx.conf +77 -0
- package/speechflow-cli/package.json +116 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +148 -64
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +39 -11
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +384 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +27 -11
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +313 -0
- package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +59 -12
- package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +11 -4
- package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +9 -4
- package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +2 -2
- package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +1 -1
- package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +1 -1
- package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +37 -20
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
- package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +4 -3
- package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +9 -5
- package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +16 -8
- package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +12 -8
- package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +9 -3
- package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +5 -2
- package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +12 -12
- package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
- package/{src → speechflow-cli/src}/speechflow-utils.ts +78 -44
- package/{src → speechflow-cli/src}/speechflow.ts +188 -53
- package/speechflow-ui-db/etc/eslint.mjs +106 -0
- package/speechflow-ui-db/etc/htmllint.json +55 -0
- package/speechflow-ui-db/etc/stx.conf +79 -0
- package/speechflow-ui-db/etc/stylelint.js +46 -0
- package/speechflow-ui-db/etc/stylelint.yaml +33 -0
- package/speechflow-ui-db/etc/tsc-client.json +30 -0
- package/speechflow-ui-db/etc/tsc.node.json +9 -0
- package/speechflow-ui-db/etc/vite-client.mts +63 -0
- package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-db/package.json +75 -0
- package/speechflow-ui-db/src/app-icon.ai +1989 -4
- package/speechflow-ui-db/src/app-icon.svg +26 -0
- package/speechflow-ui-db/src/app.styl +64 -0
- package/speechflow-ui-db/src/app.vue +221 -0
- package/speechflow-ui-db/src/index.html +23 -0
- package/speechflow-ui-db/src/index.ts +26 -0
- package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
- package/speechflow-ui-db/src/tsconfig.json +3 -0
- package/speechflow-ui-st/etc/eslint.mjs +106 -0
- package/speechflow-ui-st/etc/htmllint.json +55 -0
- package/speechflow-ui-st/etc/stx.conf +79 -0
- package/speechflow-ui-st/etc/stylelint.js +46 -0
- package/speechflow-ui-st/etc/stylelint.yaml +33 -0
- package/speechflow-ui-st/etc/tsc-client.json +30 -0
- package/speechflow-ui-st/etc/tsc.node.json +9 -0
- package/speechflow-ui-st/etc/vite-client.mts +63 -0
- package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-st/package.json +79 -0
- package/speechflow-ui-st/src/app-icon.ai +1989 -4
- package/speechflow-ui-st/src/app-icon.svg +26 -0
- package/speechflow-ui-st/src/app.styl +64 -0
- package/speechflow-ui-st/src/app.vue +142 -0
- package/speechflow-ui-st/src/index.html +23 -0
- package/speechflow-ui-st/src/index.ts +26 -0
- package/speechflow-ui-st/src/lib.d.ts +9 -0
- package/speechflow-ui-st/src/tsconfig.json +3 -0
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
- package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
- package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
- package/dst/speechflow-node-a2a-gender.d.ts +0 -18
- package/dst/speechflow-node-a2a-gender.js +0 -271
- package/dst/speechflow-node-a2a-gender.js.map +0 -1
- package/dst/speechflow-node-a2a-meter.d.ts +0 -12
- package/dst/speechflow-node-a2a-meter.js +0 -155
- package/dst/speechflow-node-a2a-meter.js.map +0 -1
- package/dst/speechflow-node-a2a-mute.d.ts +0 -16
- package/dst/speechflow-node-a2a-mute.js +0 -91
- package/dst/speechflow-node-a2a-mute.js.map +0 -1
- package/dst/speechflow-node-a2a-vad.d.ts +0 -16
- package/dst/speechflow-node-a2a-vad.js +0 -285
- package/dst/speechflow-node-a2a-vad.js.map +0 -1
- package/dst/speechflow-node-a2a-wav.d.ts +0 -11
- package/dst/speechflow-node-a2a-wav.js +0 -195
- package/dst/speechflow-node-a2a-wav.js.map +0 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +0 -15
- package/dst/speechflow-node-a2t-deepgram.js +0 -255
- package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -16
- package/dst/speechflow-node-t2a-elevenlabs.js +0 -195
- package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +0 -13
- package/dst/speechflow-node-t2a-kokoro.js +0 -149
- package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
- package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
- package/dst/speechflow-node-t2t-deepl.js +0 -142
- package/dst/speechflow-node-t2t-deepl.js.map +0 -1
- package/dst/speechflow-node-t2t-format.d.ts +0 -11
- package/dst/speechflow-node-t2t-format.js +0 -82
- package/dst/speechflow-node-t2t-format.js.map +0 -1
- package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
- package/dst/speechflow-node-t2t-ollama.js +0 -247
- package/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/dst/speechflow-node-t2t-openai.d.ts +0 -13
- package/dst/speechflow-node-t2t-openai.js +0 -227
- package/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
- package/dst/speechflow-node-t2t-sentence.js +0 -234
- package/dst/speechflow-node-t2t-sentence.js.map +0 -1
- package/dst/speechflow-node-t2t-subtitle.d.ts +0 -13
- package/dst/speechflow-node-t2t-subtitle.js +0 -278
- package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
- package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
- package/dst/speechflow-node-t2t-transformers.js +0 -265
- package/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/dst/speechflow-node-x2x-filter.d.ts +0 -11
- package/dst/speechflow-node-x2x-filter.js +0 -117
- package/dst/speechflow-node-x2x-filter.js.map +0 -1
- package/dst/speechflow-node-x2x-trace.d.ts +0 -11
- package/dst/speechflow-node-x2x-trace.js +0 -111
- package/dst/speechflow-node-x2x-trace.js.map +0 -1
- package/dst/speechflow-node-xio-device.d.ts +0 -13
- package/dst/speechflow-node-xio-device.js +0 -226
- package/dst/speechflow-node-xio-device.js.map +0 -1
- package/dst/speechflow-node-xio-file.d.ts +0 -11
- package/dst/speechflow-node-xio-file.js +0 -210
- package/dst/speechflow-node-xio-file.js.map +0 -1
- package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
- package/dst/speechflow-node-xio-mqtt.js +0 -185
- package/dst/speechflow-node-xio-mqtt.js.map +0 -1
- package/dst/speechflow-node-xio-websocket.d.ts +0 -13
- package/dst/speechflow-node-xio-websocket.js +0 -278
- package/dst/speechflow-node-xio-websocket.js.map +0 -1
- package/dst/speechflow-node.d.ts +0 -65
- package/dst/speechflow-node.js +0 -180
- package/dst/speechflow-node.js.map +0 -1
- package/dst/speechflow-utils.d.ts +0 -69
- package/dst/speechflow-utils.js +0 -486
- package/dst/speechflow-utils.js.map +0 -1
- package/dst/speechflow.js +0 -768
- package/dst/speechflow.js.map +0 -1
- package/src/speechflow-node-a2a-meter.ts +0 -130
- package/src/speechflow-node-a2a-vad.ts +0 -285
- package/src/speechflow-node-a2t-deepgram.ts +0 -234
- package/src/speechflow-node-t2t-subtitle.ts +0 -149
- /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
- /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
- /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
- /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
- /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { RealTimeVAD } from "@ericedouard/vad-node-realtime"
|
|
12
|
+
|
|
13
|
+
/* internal dependencies */
|
|
14
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
|
+
import * as utils from "./speechflow-utils"
|
|
16
|
+
|
|
17
|
+
/* audio stream queue element */
|
|
18
|
+
type AudioQueueElementSegment = {
|
|
19
|
+
data: Float32Array,
|
|
20
|
+
isSpeech?: boolean
|
|
21
|
+
}
|
|
22
|
+
type AudioQueueElement = {
|
|
23
|
+
type: "audio-frame",
|
|
24
|
+
chunk: SpeechFlowChunk,
|
|
25
|
+
segmentIdx: number,
|
|
26
|
+
segmentData: AudioQueueElementSegment[],
|
|
27
|
+
isSpeech?: boolean
|
|
28
|
+
} | {
|
|
29
|
+
type: "audio-eof"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/* SpeechFlow node for VAD speech-to-speech processing */
|
|
33
|
+
export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
34
|
+
/* declare official node name */
|
|
35
|
+
public static name = "vad"
|
|
36
|
+
|
|
37
|
+
/* internal state */
|
|
38
|
+
private vad: RealTimeVAD | null = null
|
|
39
|
+
private queue = new utils.Queue<AudioQueueElement>()
|
|
40
|
+
private queueRecv = this.queue.pointerUse("recv")
|
|
41
|
+
private queueVAD = this.queue.pointerUse("vad")
|
|
42
|
+
private queueSend = this.queue.pointerUse("send")
|
|
43
|
+
private destroyed = false
|
|
44
|
+
private tailTimer: ReturnType<typeof setTimeout> | null = null
|
|
45
|
+
private activeEventListeners = new Set<() => void>()
|
|
46
|
+
|
|
47
|
+
/* construct node */
|
|
48
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
49
|
+
super(id, cfg, opts, args)
|
|
50
|
+
|
|
51
|
+
/* declare node configuration parameters */
|
|
52
|
+
this.configure({
|
|
53
|
+
mode: { type: "string", val: "silenced", match: /^(?:silenced|unplugged)$/ },
|
|
54
|
+
posSpeechThreshold: { type: "number", val: 0.50 },
|
|
55
|
+
negSpeechThreshold: { type: "number", val: 0.35 },
|
|
56
|
+
minSpeechFrames: { type: "number", val: 2 },
|
|
57
|
+
redemptionFrames: { type: "number", val: 12 },
|
|
58
|
+
preSpeechPadFrames: { type: "number", val: 1 },
|
|
59
|
+
postSpeechTail: { type: "number", val: 1500 }
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
/* declare node input/output format */
|
|
63
|
+
this.input = "audio"
|
|
64
|
+
this.output = "audio"
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/* open node */
|
|
68
|
+
async open () {
|
|
69
|
+
/* sanity check situation */
|
|
70
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
71
|
+
throw new Error("VAD node currently supports PCM-S16LE audio only")
|
|
72
|
+
|
|
73
|
+
/* clear destruction flag */
|
|
74
|
+
this.destroyed = false
|
|
75
|
+
|
|
76
|
+
/* internal processing constants */
|
|
77
|
+
const vadSampleRateTarget = 16000 /* internal target of VAD */
|
|
78
|
+
const vadSamplesPerFrame = 512 /* required for VAD v5 */
|
|
79
|
+
|
|
80
|
+
/* helper function for timer cleanup */
|
|
81
|
+
const clearTailTimer = () => {
|
|
82
|
+
if (this.tailTimer !== null) {
|
|
83
|
+
clearTimeout(this.tailTimer)
|
|
84
|
+
this.tailTimer = null
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/* establish Voice Activity Detection (VAD) facility */
|
|
89
|
+
let tail = false
|
|
90
|
+
try {
|
|
91
|
+
this.vad = await RealTimeVAD.new({
|
|
92
|
+
model: "v5",
|
|
93
|
+
sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
|
|
94
|
+
frameSamples: vadSamplesPerFrame, /* after resampling to 16KHz */
|
|
95
|
+
positiveSpeechThreshold: this.params.posSpeechThreshold,
|
|
96
|
+
negativeSpeechThreshold: this.params.negSpeechThreshold,
|
|
97
|
+
minSpeechFrames: this.params.minSpeechFrames,
|
|
98
|
+
redemptionFrames: this.params.redemptionFrames,
|
|
99
|
+
preSpeechPadFrames: this.params.preSpeechPadFrames,
|
|
100
|
+
onSpeechStart: () => {
|
|
101
|
+
if (this.destroyed)
|
|
102
|
+
return
|
|
103
|
+
this.log("info", "VAD: speech start")
|
|
104
|
+
if (this.params.mode === "unplugged") {
|
|
105
|
+
tail = false
|
|
106
|
+
clearTailTimer()
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
onSpeechEnd: (audio) => {
|
|
110
|
+
if (this.destroyed)
|
|
111
|
+
return
|
|
112
|
+
const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
|
|
113
|
+
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
114
|
+
if (this.params.mode === "unplugged") {
|
|
115
|
+
tail = true
|
|
116
|
+
clearTailTimer()
|
|
117
|
+
this.tailTimer = setTimeout(() => {
|
|
118
|
+
if (this.destroyed || this.tailTimer === null)
|
|
119
|
+
return
|
|
120
|
+
tail = false
|
|
121
|
+
this.tailTimer = null
|
|
122
|
+
}, this.params.postSpeechTail)
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
onVADMisfire: () => {
|
|
126
|
+
if (this.destroyed)
|
|
127
|
+
return
|
|
128
|
+
this.log("info", "VAD: speech end (segment too short)")
|
|
129
|
+
if (this.params.mode === "unplugged") {
|
|
130
|
+
tail = true
|
|
131
|
+
clearTailTimer()
|
|
132
|
+
this.tailTimer = setTimeout(() => {
|
|
133
|
+
if (this.destroyed || this.tailTimer === null)
|
|
134
|
+
return
|
|
135
|
+
tail = false
|
|
136
|
+
this.tailTimer = null
|
|
137
|
+
}, this.params.postSpeechTail)
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
onFrameProcessed: (audio) => {
|
|
141
|
+
if (this.destroyed)
|
|
142
|
+
return
|
|
143
|
+
try {
|
|
144
|
+
/* annotate the current audio segment */
|
|
145
|
+
const element = this.queueVAD.peek()
|
|
146
|
+
if (element === undefined || element.type !== "audio-frame")
|
|
147
|
+
throw new Error("internal error which cannot happen: no more queued element")
|
|
148
|
+
if (element.segmentIdx >= element.segmentData.length)
|
|
149
|
+
throw new Error("segment index out of bounds")
|
|
150
|
+
const segment = element.segmentData[element.segmentIdx++]
|
|
151
|
+
segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
|
|
152
|
+
|
|
153
|
+
/* annotate the entire audio chunk */
|
|
154
|
+
if (element.segmentIdx >= element.segmentData.length) {
|
|
155
|
+
element.isSpeech = element.segmentData.some(segment => segment.isSpeech)
|
|
156
|
+
this.queueVAD.touch()
|
|
157
|
+
this.queueVAD.walk(+1)
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
this.log("error", `VAD frame processing error: ${error}`)
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
})
|
|
165
|
+
this.vad.start()
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
throw new Error(`failed to initialize VAD: ${error}`)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/* provide Duplex stream and internally attach to VAD */
|
|
172
|
+
const self = this
|
|
173
|
+
this.stream = new Stream.Duplex({
|
|
174
|
+
writableObjectMode: true,
|
|
175
|
+
readableObjectMode: true,
|
|
176
|
+
decodeStrings: false,
|
|
177
|
+
highWaterMark: 1,
|
|
178
|
+
|
|
179
|
+
/* receive audio chunk (writable side of stream) */
|
|
180
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
181
|
+
if (self.destroyed) {
|
|
182
|
+
callback(new Error("stream already destroyed"))
|
|
183
|
+
return
|
|
184
|
+
}
|
|
185
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
186
|
+
callback(new Error("expected audio input as Buffer chunks"))
|
|
187
|
+
else if (chunk.payload.byteLength === 0)
|
|
188
|
+
callback()
|
|
189
|
+
else {
|
|
190
|
+
try {
|
|
191
|
+
/* convert audio samples from PCM/I16 to PCM/F32 */
|
|
192
|
+
const data = utils.convertBufToF32(chunk.payload,
|
|
193
|
+
self.config.audioLittleEndian)
|
|
194
|
+
|
|
195
|
+
/* segment audio samples as individual VAD-sized frames */
|
|
196
|
+
const segmentData: AudioQueueElementSegment[] = []
|
|
197
|
+
const chunkSize = vadSamplesPerFrame *
|
|
198
|
+
(self.config.audioSampleRate / vadSampleRateTarget)
|
|
199
|
+
const chunks = Math.trunc(data.length / chunkSize)
|
|
200
|
+
for (let i = 0; i < chunks; i++) {
|
|
201
|
+
const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
|
|
202
|
+
const segment: AudioQueueElementSegment = { data: frame }
|
|
203
|
+
segmentData.push(segment)
|
|
204
|
+
}
|
|
205
|
+
if ((chunks * chunkSize) < data.length) {
|
|
206
|
+
const frame = new Float32Array(chunkSize)
|
|
207
|
+
frame.fill(0)
|
|
208
|
+
frame.set(data.slice(chunks * chunkSize, data.length))
|
|
209
|
+
const segment: AudioQueueElementSegment = { data: frame }
|
|
210
|
+
segmentData.push(segment)
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/* queue the results */
|
|
214
|
+
self.queueRecv.append({
|
|
215
|
+
type: "audio-frame", chunk,
|
|
216
|
+
segmentIdx: 0, segmentData
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
/* push segments through Voice Activity Detection (VAD) */
|
|
220
|
+
if (self.vad && !self.destroyed) {
|
|
221
|
+
try {
|
|
222
|
+
for (const segment of segmentData)
|
|
223
|
+
self.vad.processAudio(segment.data)
|
|
224
|
+
}
|
|
225
|
+
catch (error) {
|
|
226
|
+
self.log("error", `VAD processAudio error: ${error}`)
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
callback()
|
|
231
|
+
}
|
|
232
|
+
catch (error) {
|
|
233
|
+
callback(error instanceof Error ? error : new Error("VAD processing failed"))
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
},
|
|
237
|
+
|
|
238
|
+
/* receive no more audio chunks (writable side of stream) */
|
|
239
|
+
final (callback) {
|
|
240
|
+
if (self.destroyed) {
|
|
241
|
+
callback()
|
|
242
|
+
return
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/* signal end of file */
|
|
246
|
+
self.queueRecv.append({ type: "audio-eof" })
|
|
247
|
+
callback()
|
|
248
|
+
},
|
|
249
|
+
|
|
250
|
+
/* send audio chunk(s) (readable side of stream) */
|
|
251
|
+
read (_size) {
|
|
252
|
+
if (self.destroyed) {
|
|
253
|
+
this.push(null)
|
|
254
|
+
return
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/* try to perform read operation from scratch */
|
|
258
|
+
const tryToRead = () => {
|
|
259
|
+
if (self.destroyed) {
|
|
260
|
+
this.push(null)
|
|
261
|
+
return
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/* flush pending audio chunks */
|
|
265
|
+
const flushPendingChunks = () => {
|
|
266
|
+
let pushed = 0
|
|
267
|
+
while (true) {
|
|
268
|
+
if (self.destroyed) {
|
|
269
|
+
this.push(null)
|
|
270
|
+
return
|
|
271
|
+
}
|
|
272
|
+
const element = self.queueSend.peek()
|
|
273
|
+
if (element === undefined)
|
|
274
|
+
break
|
|
275
|
+
else if (element.type === "audio-eof") {
|
|
276
|
+
this.push(null)
|
|
277
|
+
break
|
|
278
|
+
}
|
|
279
|
+
else if (element.type === "audio-frame"
|
|
280
|
+
&& element.isSpeech === undefined)
|
|
281
|
+
break
|
|
282
|
+
self.queueSend.walk(+1)
|
|
283
|
+
self.queue.trim()
|
|
284
|
+
if (element.isSpeech) {
|
|
285
|
+
this.push(element.chunk)
|
|
286
|
+
pushed++
|
|
287
|
+
}
|
|
288
|
+
else if (self.params.mode === "silenced") {
|
|
289
|
+
const chunk = element.chunk.clone()
|
|
290
|
+
const buffer = chunk.payload as Buffer
|
|
291
|
+
buffer.fill(0)
|
|
292
|
+
this.push(chunk)
|
|
293
|
+
pushed++
|
|
294
|
+
}
|
|
295
|
+
else if (self.params.mode === "unplugged" && pushed === 0) {
|
|
296
|
+
/* we have to await chunks now, as in unplugged
|
|
297
|
+
mode we else would be never called again until
|
|
298
|
+
we at least once push a new chunk as the result */
|
|
299
|
+
setTimeout(() => {
|
|
300
|
+
if (self.destroyed)
|
|
301
|
+
return
|
|
302
|
+
tryToRead()
|
|
303
|
+
}, 0)
|
|
304
|
+
return
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/* await forthcoming audio chunks */
|
|
310
|
+
const awaitForthcomingChunks = () => {
|
|
311
|
+
if (self.destroyed)
|
|
312
|
+
return
|
|
313
|
+
const element = self.queueSend.peek()
|
|
314
|
+
if (element !== undefined
|
|
315
|
+
&& element.type === "audio-frame"
|
|
316
|
+
&& element.isSpeech !== undefined)
|
|
317
|
+
flushPendingChunks()
|
|
318
|
+
else if (!self.destroyed) {
|
|
319
|
+
self.queue.once("write", awaitForthcomingChunks)
|
|
320
|
+
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const element = self.queueSend.peek()
|
|
325
|
+
if (element !== undefined && element.type === "audio-eof")
|
|
326
|
+
this.push(null)
|
|
327
|
+
else if (element !== undefined
|
|
328
|
+
&& element.type === "audio-frame"
|
|
329
|
+
&& element.isSpeech !== undefined)
|
|
330
|
+
flushPendingChunks()
|
|
331
|
+
else if (!self.destroyed) {
|
|
332
|
+
self.queue.once("write", awaitForthcomingChunks)
|
|
333
|
+
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
tryToRead()
|
|
337
|
+
}
|
|
338
|
+
})
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/* close node */
|
|
342
|
+
async close () {
|
|
343
|
+
/* indicate destruction */
|
|
344
|
+
this.destroyed = true
|
|
345
|
+
|
|
346
|
+
/* cleanup tail timer */
|
|
347
|
+
if (this.tailTimer !== null) {
|
|
348
|
+
clearTimeout(this.tailTimer)
|
|
349
|
+
this.tailTimer = null
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/* remove all event listeners */
|
|
353
|
+
this.activeEventListeners.forEach((listener) => {
|
|
354
|
+
this.queue.removeListener("write", listener)
|
|
355
|
+
})
|
|
356
|
+
this.activeEventListeners.clear()
|
|
357
|
+
|
|
358
|
+
/* close stream */
|
|
359
|
+
if (this.stream !== null) {
|
|
360
|
+
this.stream.destroy()
|
|
361
|
+
this.stream = null
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/* cleanup queue pointers before closing VAD to prevent callback access */
|
|
365
|
+
this.queue.pointerDelete("recv")
|
|
366
|
+
this.queue.pointerDelete("vad")
|
|
367
|
+
this.queue.pointerDelete("send")
|
|
368
|
+
|
|
369
|
+
/* close VAD */
|
|
370
|
+
if (this.vad !== null) {
|
|
371
|
+
try {
|
|
372
|
+
const flushPromise = this.vad.flush()
|
|
373
|
+
const timeoutPromise = new Promise((resolve) =>
|
|
374
|
+
setTimeout(resolve, 5000))
|
|
375
|
+
await Promise.race([ flushPromise, timeoutPromise ])
|
|
376
|
+
}
|
|
377
|
+
catch (error) {
|
|
378
|
+
this.log("warning", `VAD flush error during close: ${error}`)
|
|
379
|
+
}
|
|
380
|
+
this.vad.destroy()
|
|
381
|
+
this.vad = null
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
@@ -103,8 +103,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
103
103
|
decodeStrings: false,
|
|
104
104
|
highWaterMark: 1,
|
|
105
105
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
106
|
-
if (!Buffer.isBuffer(chunk.payload))
|
|
106
|
+
if (!Buffer.isBuffer(chunk.payload)) {
|
|
107
107
|
callback(new Error("invalid chunk payload type"))
|
|
108
|
+
return
|
|
109
|
+
}
|
|
108
110
|
else if (firstChunk) {
|
|
109
111
|
if (self.params.mode === "encode") {
|
|
110
112
|
/* convert raw/PCM to WAV/PCM
|
|
@@ -127,6 +129,10 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
127
129
|
}
|
|
128
130
|
else if (self.params.mode === "decode") {
|
|
129
131
|
/* convert WAV/PCM to raw/PCM */
|
|
132
|
+
if (chunk.payload.length < 44) {
|
|
133
|
+
callback(new Error("WAV header too short, expected at least 44 bytes"))
|
|
134
|
+
return
|
|
135
|
+
}
|
|
130
136
|
const header = readWavHeader(chunk.payload)
|
|
131
137
|
self.log("info", "WAV audio stream: " +
|
|
132
138
|
`audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
|
|
@@ -134,20 +140,30 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
134
140
|
`channels=${header.channels} ` +
|
|
135
141
|
`sampleRate=${header.sampleRate} ` +
|
|
136
142
|
`bitDepth=${header.bitDepth}`)
|
|
137
|
-
if (header.audioFormat !== 0x0001 /* PCM */)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if (header.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
if (header.audioFormat !== 0x0001 /* PCM */) {
|
|
144
|
+
callback(new Error("WAV not based on PCM format"))
|
|
145
|
+
return
|
|
146
|
+
}
|
|
147
|
+
if (header.bitDepth !== self.config.audioBitDepth) {
|
|
148
|
+
callback(new Error(`WAV not based on ${self.config.audioBitDepth} bit samples`))
|
|
149
|
+
return
|
|
150
|
+
}
|
|
151
|
+
if (header.sampleRate !== self.config.audioSampleRate) {
|
|
152
|
+
callback(new Error(`WAV not based on ${self.config.audioSampleRate}Hz sample rate`))
|
|
153
|
+
return
|
|
154
|
+
}
|
|
155
|
+
if (header.channels !== self.config.audioChannels) {
|
|
156
|
+
callback(new Error(`WAV not based on ${self.config.audioChannels} channel(s)`))
|
|
157
|
+
return
|
|
158
|
+
}
|
|
145
159
|
chunk.payload = chunk.payload.subarray(44)
|
|
146
160
|
this.push(chunk)
|
|
147
161
|
callback()
|
|
148
162
|
}
|
|
149
|
-
else
|
|
150
|
-
|
|
163
|
+
else {
|
|
164
|
+
callback(new Error(`invalid operation mode "${self.params.mode}"`))
|
|
165
|
+
return
|
|
166
|
+
}
|
|
151
167
|
}
|
|
152
168
|
else {
|
|
153
169
|
/* pass-through original chunk */
|