speechflow 1.6.5 → 1.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +23 -0
- package/etc/stx.conf +5 -0
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +12 -11
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +12 -11
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -8
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +18 -16
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +38 -34
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +11 -11
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +44 -10
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +213 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js +149 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js +202 -0
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +12 -11
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +13 -12
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +24 -23
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +35 -7
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +16 -16
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +16 -16
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +15 -15
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +9 -9
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +13 -12
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +4 -4
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +36 -2
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +5 -5
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +13 -13
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -2
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +42 -8
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -2
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +19 -18
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +13 -13
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +8 -8
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.js +6 -6
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-stream.js +22 -2
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/etc/tsconfig.json +1 -0
- package/speechflow-cli/package.json +14 -14
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +13 -12
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +13 -12
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +2 -8
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +19 -17
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +8 -8
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +42 -36
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +11 -11
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +11 -10
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +221 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +13 -12
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +14 -13
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +24 -23
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -7
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +16 -16
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +16 -16
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +15 -15
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +9 -9
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +13 -12
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +4 -4
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +3 -2
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +6 -6
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +13 -13
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +2 -2
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +10 -9
- package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -3
- package/speechflow-cli/src/speechflow-node-xio-file.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +14 -14
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +10 -10
- package/speechflow-cli/src/speechflow-node.ts +6 -6
- package/speechflow-cli/src/speechflow-util-audio.ts +1 -1
- package/speechflow-cli/src/speechflow-util-stream.ts +30 -5
- package/speechflow-ui-db/dst/index.js +20 -20
- package/speechflow-ui-db/package.json +7 -7
- package/speechflow-ui-st/dst/index.js +40 -40
- package/speechflow-ui-st/package.json +8 -8
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
|
+
|
|
11
|
+
/* external dependencies */
|
|
12
|
+
import { AudioWorkletNode } from "node-web-audio-api"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
import * as util from "./speechflow-util"
|
|
17
|
+
|
|
18
|
+
/* parameter configuration */
|
|
19
|
+
type AudioPitchShifterConfig = {
|
|
20
|
+
rate?: number
|
|
21
|
+
tempo?: number
|
|
22
|
+
pitch?: number
|
|
23
|
+
semitones?: number
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/* audio pitch shifter class using SoundTouch WebAudio worklet */
|
|
27
|
+
class AudioPitchShifter extends util.WebAudio {
|
|
28
|
+
/* internal state */
|
|
29
|
+
private pitchNode: AudioWorkletNode | null = null
|
|
30
|
+
private config: Required<AudioPitchShifterConfig>
|
|
31
|
+
|
|
32
|
+
/* construct object */
|
|
33
|
+
constructor (
|
|
34
|
+
sampleRate: number,
|
|
35
|
+
channels: number,
|
|
36
|
+
config: AudioPitchShifterConfig = {}
|
|
37
|
+
) {
|
|
38
|
+
super(sampleRate, channels)
|
|
39
|
+
this.config = {
|
|
40
|
+
rate: config.rate ?? 1.0,
|
|
41
|
+
tempo: config.tempo ?? 1.0,
|
|
42
|
+
pitch: config.pitch ?? 1.0,
|
|
43
|
+
semitones: config.semitones ?? 0.0
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/* setup object */
|
|
48
|
+
public async setup (): Promise<void> {
|
|
49
|
+
await super.setup()
|
|
50
|
+
|
|
51
|
+
/* add SoundTouch worklet module */
|
|
52
|
+
const packagePath = path.join(__dirname, "../node_modules/@soundtouchjs/audio-worklet")
|
|
53
|
+
const workletPath = path.join(packagePath, "dist/soundtouch-worklet.js")
|
|
54
|
+
await this.audioContext.audioWorklet.addModule(workletPath)
|
|
55
|
+
|
|
56
|
+
/* create SoundTouch worklet node */
|
|
57
|
+
this.pitchNode = new AudioWorkletNode(this.audioContext, "soundtouch-processor", {
|
|
58
|
+
numberOfInputs: 1,
|
|
59
|
+
numberOfOutputs: 1,
|
|
60
|
+
outputChannelCount: [ this.channels ]
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
/* set initial parameter values */
|
|
64
|
+
const params = this.pitchNode.parameters as Map<string, AudioParam>
|
|
65
|
+
params.get("rate")!.value = this.config.rate
|
|
66
|
+
params.get("tempo")!.value = this.config.tempo
|
|
67
|
+
params.get("pitch")!.value = this.config.pitch
|
|
68
|
+
params.get("pitchSemitones")!.value = this.config.semitones
|
|
69
|
+
|
|
70
|
+
/* connect nodes: source -> pitch -> capture */
|
|
71
|
+
this.sourceNode!.connect(this.pitchNode)
|
|
72
|
+
this.pitchNode.connect(this.captureNode!)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/* update an audio parameter value */
|
|
76
|
+
private updateParameter (
|
|
77
|
+
paramName: string,
|
|
78
|
+
value: number,
|
|
79
|
+
configField: keyof Required<AudioPitchShifterConfig>
|
|
80
|
+
): void {
|
|
81
|
+
const params = this.pitchNode?.parameters as Map<string, AudioParam>
|
|
82
|
+
params?.get(paramName)?.setValueAtTime(value, this.audioContext.currentTime)
|
|
83
|
+
this.config[configField] = value
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* update rate value */
|
|
87
|
+
public setRate (rate: number): void {
|
|
88
|
+
this.updateParameter("rate", rate, "rate")
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/* update tempo value */
|
|
92
|
+
public setTempo (tempo: number): void {
|
|
93
|
+
this.updateParameter("tempo", tempo, "tempo")
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/* update pitch shift value */
|
|
97
|
+
public setPitch (pitch: number): void {
|
|
98
|
+
this.updateParameter("pitch", pitch, "pitch")
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/* update pitch semitones setting */
|
|
102
|
+
public setSemitones (semitones: number): void {
|
|
103
|
+
this.updateParameter("pitchSemitones", semitones, "semitones")
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/* destroy the pitch shifter */
|
|
107
|
+
public async destroy (): Promise<void> {
|
|
108
|
+
/* disconnect pitch node */
|
|
109
|
+
if (this.pitchNode !== null) {
|
|
110
|
+
this.pitchNode.disconnect()
|
|
111
|
+
this.pitchNode = null
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* destroy parent */
|
|
115
|
+
await super.destroy()
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/* SpeechFlow node for pitch adjustment using SoundTouch WebAudio */
|
|
120
|
+
export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
|
|
121
|
+
/* declare official node name */
|
|
122
|
+
public static name = "a2a-pitch"
|
|
123
|
+
|
|
124
|
+
/* internal state */
|
|
125
|
+
private closing = false
|
|
126
|
+
private pitchShifter: AudioPitchShifter | null = null
|
|
127
|
+
|
|
128
|
+
/* construct node */
|
|
129
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
130
|
+
super(id, cfg, opts, args)
|
|
131
|
+
|
|
132
|
+
/* declare node configuration parameters */
|
|
133
|
+
this.configure({
|
|
134
|
+
rate: { type: "number", val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
|
|
135
|
+
tempo: { type: "number", val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
|
|
136
|
+
pitch: { type: "number", val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
|
|
137
|
+
semitones: { type: "number", val: 0.0, match: (n: number) => n >= -24 && n <= 24 }
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
/* declare node input/output format */
|
|
141
|
+
this.input = "audio"
|
|
142
|
+
this.output = "audio"
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/* open node */
|
|
146
|
+
async open () {
|
|
147
|
+
/* clear destruction flag */
|
|
148
|
+
this.closing = false
|
|
149
|
+
|
|
150
|
+
/* setup pitch shifter */
|
|
151
|
+
this.pitchShifter = new AudioPitchShifter(
|
|
152
|
+
this.config.audioSampleRate,
|
|
153
|
+
this.config.audioChannels, {
|
|
154
|
+
rate: this.params.rate,
|
|
155
|
+
tempo: this.params.tempo,
|
|
156
|
+
pitch: this.params.pitch,
|
|
157
|
+
semitones: this.params.semitones
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
await this.pitchShifter.setup()
|
|
161
|
+
|
|
162
|
+
/* establish a transform stream */
|
|
163
|
+
const self = this
|
|
164
|
+
this.stream = new Stream.Transform({
|
|
165
|
+
readableObjectMode: true,
|
|
166
|
+
writableObjectMode: true,
|
|
167
|
+
decodeStrings: false,
|
|
168
|
+
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
169
|
+
if (self.closing) {
|
|
170
|
+
callback(new Error("stream already destroyed"))
|
|
171
|
+
return
|
|
172
|
+
}
|
|
173
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
174
|
+
callback(new Error("invalid chunk payload type"))
|
|
175
|
+
else {
|
|
176
|
+
/* shift pitch of audio chunk */
|
|
177
|
+
const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
|
|
178
|
+
self.pitchShifter?.process(payload).then((result) => {
|
|
179
|
+
if (self.closing)
|
|
180
|
+
throw new Error("stream already destroyed")
|
|
181
|
+
|
|
182
|
+
/* take over pitch-shifted data */
|
|
183
|
+
const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
|
|
184
|
+
chunk.payload = payload
|
|
185
|
+
this.push(chunk)
|
|
186
|
+
callback()
|
|
187
|
+
}).catch((error: unknown) => {
|
|
188
|
+
if (!self.closing)
|
|
189
|
+
callback(util.ensureError(error, "pitch shifting failed"))
|
|
190
|
+
})
|
|
191
|
+
}
|
|
192
|
+
},
|
|
193
|
+
final (callback) {
|
|
194
|
+
if (self.closing) {
|
|
195
|
+
callback()
|
|
196
|
+
return
|
|
197
|
+
}
|
|
198
|
+
this.push(null)
|
|
199
|
+
callback()
|
|
200
|
+
}
|
|
201
|
+
})
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/* close node */
|
|
205
|
+
async close () {
|
|
206
|
+
/* indicate closing */
|
|
207
|
+
this.closing = true
|
|
208
|
+
|
|
209
|
+
/* destroy pitch shifter */
|
|
210
|
+
if (this.pitchShifter !== null) {
|
|
211
|
+
await this.pitchShifter.destroy()
|
|
212
|
+
this.pitchShifter = null
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/* shutdown stream */
|
|
216
|
+
if (this.stream !== null) {
|
|
217
|
+
await util.destroyStream(this.stream)
|
|
218
|
+
this.stream = null
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
@@ -19,7 +19,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
19
19
|
public static name = "a2a-rnnoise"
|
|
20
20
|
|
|
21
21
|
/* internal state */
|
|
22
|
-
private
|
|
22
|
+
private closing = false
|
|
23
23
|
private sampleSize = 480 /* = 10ms at 48KHz, as required by RNNoise! */
|
|
24
24
|
private worker: Worker | null = null
|
|
25
25
|
|
|
@@ -38,7 +38,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
38
38
|
/* open node */
|
|
39
39
|
async open () {
|
|
40
40
|
/* clear destruction flag */
|
|
41
|
-
this.
|
|
41
|
+
this.closing = false
|
|
42
42
|
|
|
43
43
|
/* initialize worker */
|
|
44
44
|
this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
|
|
@@ -89,7 +89,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
89
89
|
/* send message to worker */
|
|
90
90
|
let seq = 0
|
|
91
91
|
const workerProcessSegment = async (segment: Int16Array<ArrayBuffer>) => {
|
|
92
|
-
if (this.
|
|
92
|
+
if (this.closing)
|
|
93
93
|
return segment
|
|
94
94
|
const id = `${seq++}`
|
|
95
95
|
return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
|
|
@@ -105,7 +105,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
105
105
|
writableObjectMode: true,
|
|
106
106
|
decodeStrings: false,
|
|
107
107
|
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
108
|
-
if (self.
|
|
108
|
+
if (self.closing) {
|
|
109
109
|
callback(new Error("stream already destroyed"))
|
|
110
110
|
return
|
|
111
111
|
}
|
|
@@ -128,14 +128,15 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
128
128
|
/* forward updated chunk */
|
|
129
129
|
this.push(chunk)
|
|
130
130
|
callback()
|
|
131
|
-
}).catch((err:
|
|
132
|
-
|
|
133
|
-
|
|
131
|
+
}).catch((err: unknown) => {
|
|
132
|
+
const error = util.ensureError(err)
|
|
133
|
+
self.log("warning", `processing of chunk failed: ${error.message}`)
|
|
134
|
+
callback(error)
|
|
134
135
|
})
|
|
135
136
|
}
|
|
136
137
|
},
|
|
137
138
|
final (callback) {
|
|
138
|
-
if (self.
|
|
139
|
+
if (self.closing) {
|
|
139
140
|
callback()
|
|
140
141
|
return
|
|
141
142
|
}
|
|
@@ -147,8 +148,8 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
147
148
|
|
|
148
149
|
/* close node */
|
|
149
150
|
async close () {
|
|
150
|
-
/* indicate
|
|
151
|
-
this.
|
|
151
|
+
/* indicate closing */
|
|
152
|
+
this.closing = true
|
|
152
153
|
|
|
153
154
|
/* shutdown worker */
|
|
154
155
|
if (this.worker !== null) {
|
|
@@ -156,9 +157,9 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
156
157
|
this.worker = null
|
|
157
158
|
}
|
|
158
159
|
|
|
159
|
-
/*
|
|
160
|
+
/* shutdown stream */
|
|
160
161
|
if (this.stream !== null) {
|
|
161
|
-
this.stream
|
|
162
|
+
await util.destroyStream(this.stream)
|
|
162
163
|
this.stream = null
|
|
163
164
|
}
|
|
164
165
|
}
|
|
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
22
22
|
public static name = "a2a-speex"
|
|
23
23
|
|
|
24
24
|
/* internal state */
|
|
25
|
-
private
|
|
25
|
+
private closing = false
|
|
26
26
|
private sampleSize = 480 /* = 10ms at 48KHz */
|
|
27
27
|
private speexProcessor: SpeexPreprocessor | null = null
|
|
28
28
|
|
|
@@ -43,7 +43,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
43
43
|
/* open node */
|
|
44
44
|
async open () {
|
|
45
45
|
/* clear destruction flag */
|
|
46
|
-
this.
|
|
46
|
+
this.closing = false
|
|
47
47
|
|
|
48
48
|
/* validate sample rate compatibility */
|
|
49
49
|
if (this.config.audioSampleRate !== 48000)
|
|
@@ -71,7 +71,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
71
71
|
writableObjectMode: true,
|
|
72
72
|
decodeStrings: false,
|
|
73
73
|
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
74
|
-
if (self.
|
|
74
|
+
if (self.closing) {
|
|
75
75
|
callback(new Error("stream already destroyed"))
|
|
76
76
|
return
|
|
77
77
|
}
|
|
@@ -83,12 +83,12 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
83
83
|
|
|
84
84
|
/* process Int16Array in necessary fixed-size segments */
|
|
85
85
|
util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
|
|
86
|
-
if (self.
|
|
86
|
+
if (self.closing)
|
|
87
87
|
throw new Error("stream already destroyed")
|
|
88
88
|
self.speexProcessor?.processInt16(segment)
|
|
89
89
|
return Promise.resolve(segment)
|
|
90
90
|
}).then((payload: Int16Array<ArrayBuffer>) => {
|
|
91
|
-
if (self.
|
|
91
|
+
if (self.closing)
|
|
92
92
|
throw new Error("stream already destroyed")
|
|
93
93
|
|
|
94
94
|
/* convert Int16Array back into Buffer */
|
|
@@ -100,14 +100,15 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
100
100
|
/* forward updated chunk */
|
|
101
101
|
this.push(chunk)
|
|
102
102
|
callback()
|
|
103
|
-
}).catch((err:
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
}).catch((err: unknown) => {
|
|
104
|
+
const error = util.ensureError(err)
|
|
105
|
+
self.log("warning", `processing of chunk failed: ${error.message}`)
|
|
106
|
+
callback(error)
|
|
106
107
|
})
|
|
107
108
|
}
|
|
108
109
|
},
|
|
109
110
|
final (callback) {
|
|
110
|
-
if (self.
|
|
111
|
+
if (self.closing) {
|
|
111
112
|
callback()
|
|
112
113
|
return
|
|
113
114
|
}
|
|
@@ -119,8 +120,8 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
119
120
|
|
|
120
121
|
/* close node */
|
|
121
122
|
async close () {
|
|
122
|
-
/* indicate
|
|
123
|
-
this.
|
|
123
|
+
/* indicate closing */
|
|
124
|
+
this.closing = true
|
|
124
125
|
|
|
125
126
|
/* destroy processor */
|
|
126
127
|
if (this.speexProcessor !== null) {
|
|
@@ -128,9 +129,9 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
128
129
|
this.speexProcessor = null
|
|
129
130
|
}
|
|
130
131
|
|
|
131
|
-
/*
|
|
132
|
+
/* shutdown stream */
|
|
132
133
|
if (this.stream !== null) {
|
|
133
|
-
this.stream
|
|
134
|
+
await util.destroyStream(this.stream)
|
|
134
135
|
this.stream = null
|
|
135
136
|
}
|
|
136
137
|
}
|
|
@@ -40,7 +40,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
40
40
|
private queueRecv = this.queue.pointerUse("recv")
|
|
41
41
|
private queueVAD = this.queue.pointerUse("vad")
|
|
42
42
|
private queueSend = this.queue.pointerUse("send")
|
|
43
|
-
private
|
|
43
|
+
private closing = false
|
|
44
44
|
private tailTimer: ReturnType<typeof setTimeout> | null = null
|
|
45
45
|
private activeEventListeners = new Set<() => void>()
|
|
46
46
|
|
|
@@ -71,7 +71,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
71
71
|
throw new Error("VAD node currently supports PCM-S16LE audio only")
|
|
72
72
|
|
|
73
73
|
/* clear destruction flag */
|
|
74
|
-
this.
|
|
74
|
+
this.closing = false
|
|
75
75
|
|
|
76
76
|
/* internal processing constants */
|
|
77
77
|
const vadSampleRateTarget = 16000 /* internal target of VAD */
|
|
@@ -98,7 +98,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
98
98
|
redemptionFrames: this.params.redemptionFrames,
|
|
99
99
|
preSpeechPadFrames: this.params.preSpeechPadFrames,
|
|
100
100
|
onSpeechStart: () => {
|
|
101
|
-
if (this.
|
|
101
|
+
if (this.closing)
|
|
102
102
|
return
|
|
103
103
|
this.log("info", "VAD: speech start")
|
|
104
104
|
if (this.params.mode === "unplugged") {
|
|
@@ -107,7 +107,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
107
107
|
}
|
|
108
108
|
},
|
|
109
109
|
onSpeechEnd: (audio) => {
|
|
110
|
-
if (this.
|
|
110
|
+
if (this.closing)
|
|
111
111
|
return
|
|
112
112
|
const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
|
|
113
113
|
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
@@ -115,7 +115,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
115
115
|
tail = true
|
|
116
116
|
clearTailTimer()
|
|
117
117
|
this.tailTimer = setTimeout(() => {
|
|
118
|
-
if (this.
|
|
118
|
+
if (this.closing || this.tailTimer === null)
|
|
119
119
|
return
|
|
120
120
|
tail = false
|
|
121
121
|
this.tailTimer = null
|
|
@@ -123,14 +123,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
123
123
|
}
|
|
124
124
|
},
|
|
125
125
|
onVADMisfire: () => {
|
|
126
|
-
if (this.
|
|
126
|
+
if (this.closing)
|
|
127
127
|
return
|
|
128
128
|
this.log("info", "VAD: speech end (segment too short)")
|
|
129
129
|
if (this.params.mode === "unplugged") {
|
|
130
130
|
tail = true
|
|
131
131
|
clearTailTimer()
|
|
132
132
|
this.tailTimer = setTimeout(() => {
|
|
133
|
-
if (this.
|
|
133
|
+
if (this.closing || this.tailTimer === null)
|
|
134
134
|
return
|
|
135
135
|
tail = false
|
|
136
136
|
this.tailTimer = null
|
|
@@ -138,7 +138,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
138
138
|
}
|
|
139
139
|
},
|
|
140
140
|
onFrameProcessed: (audio) => {
|
|
141
|
-
if (this.
|
|
141
|
+
if (this.closing)
|
|
142
142
|
return
|
|
143
143
|
try {
|
|
144
144
|
/* annotate the current audio segment */
|
|
@@ -178,7 +178,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
178
178
|
|
|
179
179
|
/* receive audio chunk (writable side of stream) */
|
|
180
180
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
181
|
-
if (self.
|
|
181
|
+
if (self.closing) {
|
|
182
182
|
callback(new Error("stream already destroyed"))
|
|
183
183
|
return
|
|
184
184
|
}
|
|
@@ -217,7 +217,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
217
217
|
})
|
|
218
218
|
|
|
219
219
|
/* push segments through Voice Activity Detection (VAD) */
|
|
220
|
-
if (self.vad && !self.
|
|
220
|
+
if (self.vad && !self.closing) {
|
|
221
221
|
try {
|
|
222
222
|
for (const segment of segmentData)
|
|
223
223
|
self.vad.processAudio(segment.data)
|
|
@@ -230,14 +230,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
230
230
|
callback()
|
|
231
231
|
}
|
|
232
232
|
catch (error) {
|
|
233
|
-
callback(error
|
|
233
|
+
callback(util.ensureError(error, "VAD processing failed"))
|
|
234
234
|
}
|
|
235
235
|
}
|
|
236
236
|
},
|
|
237
237
|
|
|
238
238
|
/* receive no more audio chunks (writable side of stream) */
|
|
239
239
|
final (callback) {
|
|
240
|
-
if (self.
|
|
240
|
+
if (self.closing) {
|
|
241
241
|
callback()
|
|
242
242
|
return
|
|
243
243
|
}
|
|
@@ -249,14 +249,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
249
249
|
|
|
250
250
|
/* send audio chunk(s) (readable side of stream) */
|
|
251
251
|
read (_size) {
|
|
252
|
-
if (self.
|
|
252
|
+
if (self.closing) {
|
|
253
253
|
this.push(null)
|
|
254
254
|
return
|
|
255
255
|
}
|
|
256
256
|
|
|
257
257
|
/* try to perform read operation from scratch */
|
|
258
258
|
const tryToRead = () => {
|
|
259
|
-
if (self.
|
|
259
|
+
if (self.closing) {
|
|
260
260
|
this.push(null)
|
|
261
261
|
return
|
|
262
262
|
}
|
|
@@ -265,7 +265,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
265
265
|
const flushPendingChunks = () => {
|
|
266
266
|
let pushed = 0
|
|
267
267
|
while (true) {
|
|
268
|
-
if (self.
|
|
268
|
+
if (self.closing) {
|
|
269
269
|
this.push(null)
|
|
270
270
|
return
|
|
271
271
|
}
|
|
@@ -297,7 +297,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
297
297
|
mode we else would be never called again until
|
|
298
298
|
we at least once push a new chunk as the result */
|
|
299
299
|
setTimeout(() => {
|
|
300
|
-
if (self.
|
|
300
|
+
if (self.closing || self.queue === null)
|
|
301
301
|
return
|
|
302
302
|
tryToRead()
|
|
303
303
|
}, 0)
|
|
@@ -308,14 +308,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
308
308
|
|
|
309
309
|
/* await forthcoming audio chunks */
|
|
310
310
|
const awaitForthcomingChunks = () => {
|
|
311
|
-
|
|
311
|
+
self.activeEventListeners.delete(awaitForthcomingChunks)
|
|
312
|
+
if (self.closing)
|
|
312
313
|
return
|
|
313
314
|
const element = self.queueSend.peek()
|
|
314
315
|
if (element !== undefined
|
|
315
316
|
&& element.type === "audio-frame"
|
|
316
317
|
&& element.isSpeech !== undefined)
|
|
317
318
|
flushPendingChunks()
|
|
318
|
-
else if (!self.
|
|
319
|
+
else if (!self.closing && !self.activeEventListeners.has(awaitForthcomingChunks)) {
|
|
319
320
|
self.queue.once("write", awaitForthcomingChunks)
|
|
320
321
|
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
321
322
|
}
|
|
@@ -328,7 +329,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
328
329
|
&& element.type === "audio-frame"
|
|
329
330
|
&& element.isSpeech !== undefined)
|
|
330
331
|
flushPendingChunks()
|
|
331
|
-
else if (!self.
|
|
332
|
+
else if (!self.closing && !self.activeEventListeners.has(awaitForthcomingChunks)) {
|
|
332
333
|
self.queue.once("write", awaitForthcomingChunks)
|
|
333
334
|
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
334
335
|
}
|
|
@@ -340,8 +341,8 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
340
341
|
|
|
341
342
|
/* close node */
|
|
342
343
|
async close () {
|
|
343
|
-
/* indicate
|
|
344
|
-
this.
|
|
344
|
+
/* indicate closing */
|
|
345
|
+
this.closing = true
|
|
345
346
|
|
|
346
347
|
/* cleanup tail timer */
|
|
347
348
|
if (this.tailTimer !== null) {
|
|
@@ -355,9 +356,9 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
355
356
|
})
|
|
356
357
|
this.activeEventListeners.clear()
|
|
357
358
|
|
|
358
|
-
/*
|
|
359
|
+
/* shutdown stream */
|
|
359
360
|
if (this.stream !== null) {
|
|
360
|
-
this.stream
|
|
361
|
+
await util.destroyStream(this.stream)
|
|
361
362
|
this.stream = null
|
|
362
363
|
}
|
|
363
364
|
|
|
@@ -9,6 +9,7 @@ import Stream from "node:stream"
|
|
|
9
9
|
|
|
10
10
|
/* internal dependencies */
|
|
11
11
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
import * as util from "./speechflow-util"
|
|
12
13
|
|
|
13
14
|
/* write WAV header */
|
|
14
15
|
const writeWavHeader = (
|
|
@@ -190,13 +191,7 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
190
191
|
async close () {
|
|
191
192
|
/* shutdown stream */
|
|
192
193
|
if (this.stream !== null) {
|
|
193
|
-
await
|
|
194
|
-
if (this.stream instanceof Stream.Duplex)
|
|
195
|
-
this.stream.end(() => { resolve() })
|
|
196
|
-
else
|
|
197
|
-
resolve()
|
|
198
|
-
})
|
|
199
|
-
this.stream.destroy()
|
|
194
|
+
await util.destroyStream(this.stream)
|
|
200
195
|
this.stream = null
|
|
201
196
|
}
|
|
202
197
|
}
|