speechflow 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +4 -4
- package/package.json +2 -2
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
- package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +2 -4
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
- package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +7 -0
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +0 -6
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +1 -4
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +1 -4
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +15 -4
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +18 -16
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -2
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -5
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +2 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +4 -0
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/package.json +10 -10
- package/speechflow-cli/src/speechflow-main-api.ts +16 -16
- package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
- package/speechflow-cli/src/speechflow-main-graph.ts +7 -9
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +7 -0
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +4 -11
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +1 -4
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +1 -4
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +15 -6
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -3
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +19 -18
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +5 -2
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +1 -0
- package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -5
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +5 -5
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +2 -0
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
- package/speechflow-cli/src/speechflow-util-audio.ts +5 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +3 -3
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/package.json +4 -4
- package/speechflow-ui-st/package.json +4 -4
|
@@ -24,15 +24,15 @@ import * as util from "./speechflow-util"
|
|
|
24
24
|
/* the SpeechFlow node graph management */
|
|
25
25
|
export class NodeGraph {
|
|
26
26
|
/* internal state */
|
|
27
|
-
private graphNodes
|
|
28
|
-
private activeNodes
|
|
29
|
-
private finishEvents
|
|
27
|
+
private graphNodes = new Set<SpeechFlowNode>()
|
|
28
|
+
private activeNodes = new Set<SpeechFlowNode>()
|
|
29
|
+
private finishEvents = new EventEmitter()
|
|
30
30
|
private timeZero: DateTime | null = null
|
|
31
|
-
private shuttingDown
|
|
31
|
+
private shuttingDown = false
|
|
32
32
|
|
|
33
33
|
/* simple construction */
|
|
34
34
|
constructor (
|
|
35
|
-
private cli:
|
|
35
|
+
private cli: CLIio,
|
|
36
36
|
private debug = false
|
|
37
37
|
) {}
|
|
38
38
|
|
|
@@ -59,6 +59,7 @@ export class NodeGraph {
|
|
|
59
59
|
err instanceof Error && err.name === "FlowLinkError"
|
|
60
60
|
? err.toString() : (err instanceof Error ? err.message : "internal error")
|
|
61
61
|
|
|
62
|
+
/* instantiate FlowLink parser */
|
|
62
63
|
const flowlink = new FlowLink<SpeechFlowNode>({
|
|
63
64
|
trace: (msg: string) => {
|
|
64
65
|
this.cli.log("debug", msg)
|
|
@@ -97,10 +98,7 @@ export class NodeGraph {
|
|
|
97
98
|
}
|
|
98
99
|
catch (err) {
|
|
99
100
|
/* fatal error */
|
|
100
|
-
|
|
101
|
-
this.cli.log("error", `creation of node <${id}> failed: ${err.message}`)
|
|
102
|
-
else
|
|
103
|
-
this.cli.log("error", `creation of node <${id}> failed: ${err}`)
|
|
101
|
+
this.cli.log("error", `creation of node <${id}> failed: ${util.ensureError(err).message}`)
|
|
104
102
|
process.exit(1)
|
|
105
103
|
}
|
|
106
104
|
const params = Object.keys(node.params).map((key) => {
|
|
@@ -36,7 +36,7 @@ class AudioCompressor extends util.WebAudio {
|
|
|
36
36
|
private gainNode: GainNode | null = null
|
|
37
37
|
|
|
38
38
|
/* construct object */
|
|
39
|
-
constructor(
|
|
39
|
+
constructor (
|
|
40
40
|
sampleRate: number,
|
|
41
41
|
channels: number,
|
|
42
42
|
type: "standalone" | "sidechain" = "standalone",
|
|
@@ -106,8 +106,7 @@ class AudioCompressor extends util.WebAudio {
|
|
|
106
106
|
/* configure compressor worklet node */
|
|
107
107
|
const currentTime = this.audioContext.currentTime
|
|
108
108
|
if (needsCompressor) {
|
|
109
|
-
const
|
|
110
|
-
const params = node.parameters as Map<string, AudioParam>
|
|
109
|
+
const params = this.compressorNode!.parameters as Map<string, AudioParam>
|
|
111
110
|
params.get("threshold")!.setValueAtTime(this.config.thresholdDb, currentTime)
|
|
112
111
|
params.get("ratio")!.setValueAtTime(this.config.ratio, currentTime)
|
|
113
112
|
params.get("attack")!.setValueAtTime(this.config.attackMs / 1000, currentTime)
|
|
@@ -241,10 +240,12 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
|
|
|
241
240
|
}
|
|
242
241
|
if (!Buffer.isBuffer(chunk.payload))
|
|
243
242
|
callback(new Error("invalid chunk payload type"))
|
|
243
|
+
else if (self.compressor === null)
|
|
244
|
+
callback(new Error("compressor not initialized"))
|
|
244
245
|
else {
|
|
245
246
|
/* compress chunk */
|
|
246
247
|
const payload = util.convertBufToI16(chunk.payload)
|
|
247
|
-
self.compressor
|
|
248
|
+
self.compressor.process(payload).then((result) => {
|
|
248
249
|
if (self.closing) {
|
|
249
250
|
callback(new Error("stream already destroyed"))
|
|
250
251
|
return
|
|
@@ -258,17 +259,14 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
|
|
|
258
259
|
this.push(chunk)
|
|
259
260
|
callback()
|
|
260
261
|
}).catch((error: unknown) => {
|
|
261
|
-
if (
|
|
262
|
+
if (self.closing)
|
|
263
|
+
callback()
|
|
264
|
+
else
|
|
262
265
|
callback(util.ensureError(error, "compression failed"))
|
|
263
266
|
})
|
|
264
267
|
}
|
|
265
268
|
},
|
|
266
269
|
final (callback) {
|
|
267
|
-
if (self.closing) {
|
|
268
|
-
callback()
|
|
269
|
-
return
|
|
270
|
-
}
|
|
271
|
-
this.push(null)
|
|
272
270
|
callback()
|
|
273
271
|
}
|
|
274
272
|
})
|
|
@@ -33,7 +33,7 @@ class AudioExpander extends util.WebAudio {
|
|
|
33
33
|
private expanderNode: AudioWorkletNode | null = null
|
|
34
34
|
|
|
35
35
|
/* construct object */
|
|
36
|
-
constructor(
|
|
36
|
+
constructor (
|
|
37
37
|
sampleRate: number,
|
|
38
38
|
channels: number,
|
|
39
39
|
config: AudioExpanderConfig = {}
|
|
@@ -71,8 +71,7 @@ class AudioExpander extends util.WebAudio {
|
|
|
71
71
|
|
|
72
72
|
/* configure expander node */
|
|
73
73
|
const currentTime = this.audioContext.currentTime
|
|
74
|
-
const
|
|
75
|
-
const params = node.parameters as Map<string, AudioParam>
|
|
74
|
+
const params = this.expanderNode.parameters as Map<string, AudioParam>
|
|
76
75
|
params.get("threshold")!.setValueAtTime(this.config.thresholdDb, currentTime)
|
|
77
76
|
params.get("floor")!.setValueAtTime(this.config.floorDb, currentTime)
|
|
78
77
|
params.get("ratio")!.setValueAtTime(this.config.ratio, currentTime)
|
|
@@ -86,6 +85,7 @@ class AudioExpander extends util.WebAudio {
|
|
|
86
85
|
this.expanderNode.connect(this.captureNode!)
|
|
87
86
|
}
|
|
88
87
|
|
|
88
|
+
/* destroy object */
|
|
89
89
|
public async destroy (): Promise<void> {
|
|
90
90
|
/* destroy expander node */
|
|
91
91
|
if (this.expanderNode !== null) {
|
|
@@ -164,10 +164,12 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
|
|
|
164
164
|
}
|
|
165
165
|
if (!Buffer.isBuffer(chunk.payload))
|
|
166
166
|
callback(new Error("invalid chunk payload type"))
|
|
167
|
+
else if (self.expander === null)
|
|
168
|
+
callback(new Error("expander not initialized"))
|
|
167
169
|
else {
|
|
168
170
|
/* expand chunk */
|
|
169
171
|
const payload = util.convertBufToI16(chunk.payload)
|
|
170
|
-
self.expander
|
|
172
|
+
self.expander.process(payload).then((result) => {
|
|
171
173
|
if (self.closing) {
|
|
172
174
|
callback(new Error("stream already destroyed"))
|
|
173
175
|
return
|
|
@@ -179,17 +181,14 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
|
|
|
179
181
|
this.push(chunk)
|
|
180
182
|
callback()
|
|
181
183
|
}).catch((error: unknown) => {
|
|
182
|
-
if (
|
|
184
|
+
if (self.closing)
|
|
185
|
+
callback()
|
|
186
|
+
else
|
|
183
187
|
callback(util.ensureError(error, "expansion failed"))
|
|
184
188
|
})
|
|
185
189
|
}
|
|
186
190
|
},
|
|
187
191
|
final (callback) {
|
|
188
|
-
if (self.closing) {
|
|
189
|
-
callback()
|
|
190
|
-
return
|
|
191
|
-
}
|
|
192
|
-
this.push(null)
|
|
193
192
|
callback()
|
|
194
193
|
}
|
|
195
194
|
})
|
|
@@ -13,6 +13,7 @@ import { Duration } from "luxon"
|
|
|
13
13
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
14
14
|
import * as util from "./speechflow-util"
|
|
15
15
|
|
|
16
|
+
/* audio gap filler class */
|
|
16
17
|
class AudioFiller extends EventEmitter {
|
|
17
18
|
private emittedEndSamples = 0 /* stream position in samples already emitted */
|
|
18
19
|
private maxInputEndSamples = 0
|
|
@@ -21,6 +22,7 @@ class AudioFiller extends EventEmitter {
|
|
|
21
22
|
private readonly bytesPerFrame: number
|
|
22
23
|
private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
|
|
23
24
|
|
|
25
|
+
/* construct object */
|
|
24
26
|
constructor (private sampleRate = 48000, private channels = 1) {
|
|
25
27
|
super()
|
|
26
28
|
this.bytesPerFrame = this.channels * this.bytesPerSample
|
|
@@ -134,7 +134,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
134
134
|
throw new Error("classifier destroyed during operation")
|
|
135
135
|
|
|
136
136
|
/* check volume level and return "unknown" if too low
|
|
137
|
-
in order to avoid a wrong
|
|
137
|
+
in order to avoid a wrong classification */
|
|
138
138
|
const audioData = {
|
|
139
139
|
sampleRate: sampleRateTarget,
|
|
140
140
|
numberOfChannels: 1,
|
|
@@ -154,8 +154,8 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
154
154
|
const classified = Array.isArray(result) ?
|
|
155
155
|
result as Transformers.AudioClassificationOutput :
|
|
156
156
|
[ result ]
|
|
157
|
-
const c1
|
|
158
|
-
const c2
|
|
157
|
+
const c1 = classified.find((c) => c.label === "male")
|
|
158
|
+
const c2 = classified.find((c) => c.label === "female")
|
|
159
159
|
const male = c1 ? c1.score : 0.0
|
|
160
160
|
const female = c2 ? c2.score : 0.0
|
|
161
161
|
const threshold = this.params.threshold
|
|
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
83
83
|
const chunkData = this.chunkBuffer
|
|
84
84
|
this.chunkBuffer = new Float32Array(0)
|
|
85
85
|
|
|
86
|
-
/* update internal audio sample sliding window for LUFS-
|
|
86
|
+
/* update internal audio sample sliding window for LUFS-M */
|
|
87
87
|
if (chunkData.length > sampleWindow.length)
|
|
88
88
|
sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
|
|
89
89
|
else {
|
|
@@ -218,4 +218,4 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
218
218
|
this.stream = null
|
|
219
219
|
}
|
|
220
220
|
}
|
|
221
|
-
}
|
|
221
|
+
}
|
|
@@ -172,12 +172,16 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
|
|
|
172
172
|
}
|
|
173
173
|
if (!Buffer.isBuffer(chunk.payload))
|
|
174
174
|
callback(new Error("invalid chunk payload type"))
|
|
175
|
+
else if (self.pitchShifter === null)
|
|
176
|
+
callback(new Error("pitch shifter not initialized"))
|
|
175
177
|
else {
|
|
176
178
|
/* shift pitch of audio chunk */
|
|
177
179
|
const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
|
|
178
|
-
self.pitchShifter
|
|
179
|
-
if (self.closing)
|
|
180
|
-
|
|
180
|
+
self.pitchShifter.process(payload).then((result) => {
|
|
181
|
+
if (self.closing) {
|
|
182
|
+
callback(new Error("stream already destroyed"))
|
|
183
|
+
return
|
|
184
|
+
}
|
|
181
185
|
|
|
182
186
|
/* take over pitch-shifted data */
|
|
183
187
|
const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
|
|
@@ -185,16 +189,14 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
|
|
|
185
189
|
this.push(chunk)
|
|
186
190
|
callback()
|
|
187
191
|
}).catch((error: unknown) => {
|
|
188
|
-
|
|
192
|
+
if (self.closing)
|
|
193
|
+
callback()
|
|
194
|
+
else
|
|
195
|
+
callback(util.ensureError(error, "pitch shifting failed"))
|
|
189
196
|
})
|
|
190
197
|
}
|
|
191
198
|
},
|
|
192
199
|
final (callback) {
|
|
193
|
-
if (self.closing) {
|
|
194
|
-
callback()
|
|
195
|
-
return
|
|
196
|
-
}
|
|
197
|
-
this.push(null)
|
|
198
200
|
callback()
|
|
199
201
|
}
|
|
200
202
|
})
|
|
@@ -48,6 +48,7 @@ parentPort!.on("message", (msg) => {
|
|
|
48
48
|
for (let i = 0; i < data.length; i++)
|
|
49
49
|
i16[i] = Math.round(f32a[i])
|
|
50
50
|
|
|
51
|
+
/* send processed frame back to parent */
|
|
51
52
|
parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
|
|
52
53
|
}
|
|
53
54
|
else if (msg.type === "close") {
|
|
@@ -93,7 +93,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
93
93
|
return segment
|
|
94
94
|
const id = `${seq++}`
|
|
95
95
|
return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
|
|
96
|
-
pending.set(id, (segment
|
|
96
|
+
pending.set(id, (segment) => { resolve(segment) })
|
|
97
97
|
this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
|
|
98
98
|
})
|
|
99
99
|
}
|
|
@@ -32,7 +32,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
32
32
|
|
|
33
33
|
/* declare node configuration parameters */
|
|
34
34
|
this.configure({
|
|
35
|
-
attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 }
|
|
35
|
+
attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 }
|
|
36
36
|
})
|
|
37
37
|
|
|
38
38
|
/* declare node input/output format */
|
|
@@ -53,7 +53,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
53
53
|
const wasmBinary = await fs.promises.readFile(
|
|
54
54
|
path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
|
|
55
55
|
const speexModule = await loadSpeexModule({
|
|
56
|
-
wasmBinary: wasmBinary.buffer
|
|
56
|
+
wasmBinary: wasmBinary.buffer
|
|
57
57
|
})
|
|
58
58
|
this.speexProcessor = new SpeexPreprocessor(
|
|
59
59
|
speexModule, this.sampleSize, this.config.audioSampleRate)
|
|
@@ -85,7 +85,9 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
85
85
|
util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
|
|
86
86
|
if (self.closing)
|
|
87
87
|
throw new Error("stream already destroyed")
|
|
88
|
-
self.speexProcessor
|
|
88
|
+
if (self.speexProcessor === null)
|
|
89
|
+
throw new Error("speex processor not initialized")
|
|
90
|
+
self.speexProcessor.processInt16(segment)
|
|
89
91
|
return Promise.resolve(segment)
|
|
90
92
|
}).then((payload: Int16Array<ArrayBuffer>) => {
|
|
91
93
|
if (self.closing)
|
|
@@ -85,6 +85,18 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
/* helper function for tail timer handling */
|
|
89
|
+
const startTailTimer = () => {
|
|
90
|
+
tail = true
|
|
91
|
+
clearTailTimer()
|
|
92
|
+
this.tailTimer = setTimeout(() => {
|
|
93
|
+
if (this.closing || this.tailTimer === null)
|
|
94
|
+
return
|
|
95
|
+
tail = false
|
|
96
|
+
this.tailTimer = null
|
|
97
|
+
}, this.params.postSpeechTail)
|
|
98
|
+
}
|
|
99
|
+
|
|
88
100
|
/* establish Voice Activity Detection (VAD) facility */
|
|
89
101
|
let tail = false
|
|
90
102
|
try {
|
|
@@ -111,31 +123,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
111
123
|
return
|
|
112
124
|
const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
|
|
113
125
|
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
114
|
-
if (this.params.mode === "unplugged")
|
|
115
|
-
|
|
116
|
-
clearTailTimer()
|
|
117
|
-
this.tailTimer = setTimeout(() => {
|
|
118
|
-
if (this.closing || this.tailTimer === null)
|
|
119
|
-
return
|
|
120
|
-
tail = false
|
|
121
|
-
this.tailTimer = null
|
|
122
|
-
}, this.params.postSpeechTail)
|
|
123
|
-
}
|
|
126
|
+
if (this.params.mode === "unplugged")
|
|
127
|
+
startTailTimer()
|
|
124
128
|
},
|
|
125
129
|
onVADMisfire: () => {
|
|
126
130
|
if (this.closing)
|
|
127
131
|
return
|
|
128
132
|
this.log("info", "VAD: speech end (segment too short)")
|
|
129
|
-
if (this.params.mode === "unplugged")
|
|
130
|
-
|
|
131
|
-
clearTailTimer()
|
|
132
|
-
this.tailTimer = setTimeout(() => {
|
|
133
|
-
if (this.closing || this.tailTimer === null)
|
|
134
|
-
return
|
|
135
|
-
tail = false
|
|
136
|
-
this.tailTimer = null
|
|
137
|
-
}, this.params.postSpeechTail)
|
|
138
|
-
}
|
|
133
|
+
if (this.params.mode === "unplugged")
|
|
134
|
+
startTailTimer()
|
|
139
135
|
},
|
|
140
136
|
onFrameProcessed: (audio) => {
|
|
141
137
|
if (this.closing)
|
|
@@ -144,7 +140,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
144
140
|
/* annotate the current audio segment */
|
|
145
141
|
const element = this.queueVAD.peek()
|
|
146
142
|
if (element === undefined || element.type !== "audio-frame")
|
|
147
|
-
throw new Error("internal error
|
|
143
|
+
throw new Error("internal error that cannot happen: no more queued element")
|
|
148
144
|
if (element.segmentIdx >= element.segmentData.length)
|
|
149
145
|
throw new Error("segment index out of bounds")
|
|
150
146
|
const segment = element.segmentData[element.segmentIdx++]
|
|
@@ -227,6 +223,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
227
223
|
}
|
|
228
224
|
}
|
|
229
225
|
|
|
226
|
+
/* signal completion */
|
|
230
227
|
callback()
|
|
231
228
|
}
|
|
232
229
|
catch (error) {
|
|
@@ -322,6 +319,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
322
319
|
}
|
|
323
320
|
}
|
|
324
321
|
|
|
322
|
+
/* peek at send queue element */
|
|
325
323
|
const element = self.queueSend.peek()
|
|
326
324
|
if (element !== undefined && element.type === "audio-eof")
|
|
327
325
|
this.push(null)
|
|
@@ -371,8 +369,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
371
369
|
if (this.vad !== null) {
|
|
372
370
|
try {
|
|
373
371
|
const flushPromise = this.vad.flush()
|
|
374
|
-
const timeoutPromise = new Promise((resolve) =>
|
|
375
|
-
setTimeout(resolve, 5000))
|
|
372
|
+
const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
|
|
376
373
|
await Promise.race([ flushPromise, timeoutPromise ])
|
|
377
374
|
}
|
|
378
375
|
catch (error) {
|
|
@@ -21,15 +21,18 @@ const writeWavHeader = (
|
|
|
21
21
|
const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
|
|
22
22
|
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
|
|
23
23
|
|
|
24
|
+
/* determine header dimensions */
|
|
24
25
|
const headerLength = 44
|
|
25
26
|
const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
|
|
26
27
|
const dataLength = length ?? maxDataSize
|
|
27
28
|
const fileSize = dataLength + headerLength
|
|
28
29
|
const header = Buffer.alloc(headerLength)
|
|
29
30
|
|
|
31
|
+
/* calculate byte rate and block alignment */
|
|
30
32
|
const byteRate = (sampleRate * channels * bitDepth) / 8
|
|
31
33
|
const blockAlign = (channels * bitDepth) / 8
|
|
32
34
|
|
|
35
|
+
/* write header fields */
|
|
33
36
|
let offset = 0
|
|
34
37
|
header.write("RIFF", offset); offset += 4
|
|
35
38
|
header.writeUInt32LE(fileSize - 8, offset); offset += 4
|
|
@@ -45,6 +48,7 @@ const writeWavHeader = (
|
|
|
45
48
|
header.write("data", offset); offset += 4
|
|
46
49
|
header.writeUInt32LE(dataLength, offset); offset += 4
|
|
47
50
|
|
|
51
|
+
/* return completed header */
|
|
48
52
|
return header
|
|
49
53
|
}
|
|
50
54
|
|
|
@@ -53,6 +57,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
53
57
|
if (buffer.length < 44)
|
|
54
58
|
throw new Error("WAV header too short, expected at least 44 bytes")
|
|
55
59
|
|
|
60
|
+
/* read header fields */
|
|
56
61
|
let offset = 0
|
|
57
62
|
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
58
63
|
const fileSize = buffer.readUInt32LE(offset); offset += 4
|
|
@@ -68,6 +73,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
68
73
|
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
69
74
|
const dataLength = buffer.readUInt32LE(offset); offset += 4
|
|
70
75
|
|
|
76
|
+
/* validate RIFF header */
|
|
71
77
|
if (riffHead !== "RIFF")
|
|
72
78
|
throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
|
|
73
79
|
if (waveHead !== "WAVE")
|
|
@@ -77,6 +83,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
77
83
|
if (data !== "data")
|
|
78
84
|
throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
|
|
79
85
|
|
|
86
|
+
/* return parsed header data */
|
|
80
87
|
return {
|
|
81
88
|
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
|
|
82
89
|
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
|
|
@@ -53,7 +53,7 @@ class AsyncQueue<T> {
|
|
|
53
53
|
continue
|
|
54
54
|
}
|
|
55
55
|
else {
|
|
56
|
-
const it = await new Promise<IteratorResult<T>>((resolve) => this.resolvers.push(resolve))
|
|
56
|
+
const it = await new Promise<IteratorResult<T>>((resolve) => { this.resolvers.push(resolve) })
|
|
57
57
|
if (it.done)
|
|
58
58
|
return
|
|
59
59
|
yield it.value
|
|
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
68
68
|
public static name = "a2t-amazon"
|
|
69
69
|
|
|
70
70
|
/* internal state */
|
|
71
|
-
private client:
|
|
72
|
-
private clientStream:
|
|
73
|
-
private closing
|
|
74
|
-
private
|
|
75
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
71
|
+
private client: TranscribeStreamingClient | null = null
|
|
72
|
+
private clientStream: AsyncIterable<TranscriptResultStream> | null = null
|
|
73
|
+
private closing = false
|
|
74
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
76
75
|
|
|
77
76
|
/* construct node */
|
|
78
77
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -126,8 +125,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
126
125
|
secretAccessKey: this.params.secKey
|
|
127
126
|
}
|
|
128
127
|
})
|
|
129
|
-
if (this.client === null)
|
|
130
|
-
throw new Error("failed to establish Amazon Transcribe client")
|
|
131
128
|
|
|
132
129
|
/* create an AudioStream for Amazon Transcribe */
|
|
133
130
|
const audioQueue = new AsyncQueue<Uint8Array>()
|
|
@@ -236,11 +233,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
236
233
|
callback()
|
|
237
234
|
return
|
|
238
235
|
}
|
|
239
|
-
|
|
240
|
-
/* await all read operations */
|
|
241
236
|
await reads.awaitAll()
|
|
242
|
-
|
|
243
|
-
util.run(
|
|
237
|
+
util.run("closing Amazon Transcribe connection",
|
|
244
238
|
() => self.client!.destroy(),
|
|
245
239
|
(error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
|
|
246
240
|
)
|
|
@@ -279,12 +273,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
279
273
|
/* indicate closing first to stop all async operations */
|
|
280
274
|
this.closing = true
|
|
281
275
|
|
|
282
|
-
/* cleanup all timers */
|
|
283
|
-
if (this.connectionTimeout !== null) {
|
|
284
|
-
clearTimeout(this.connectionTimeout)
|
|
285
|
-
this.connectionTimeout = null
|
|
286
|
-
}
|
|
287
|
-
|
|
288
276
|
/* close queue */
|
|
289
277
|
if (this.queue !== null) {
|
|
290
278
|
this.queue.write(null)
|
|
@@ -22,11 +22,10 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
22
22
|
public static name = "a2t-google"
|
|
23
23
|
|
|
24
24
|
/* internal state */
|
|
25
|
-
private client:
|
|
26
|
-
private recognizeStream:
|
|
27
|
-
private
|
|
28
|
-
private
|
|
29
|
-
private closing = false
|
|
25
|
+
private client: GoogleSpeech.SpeechClient | null = null
|
|
26
|
+
private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
|
|
27
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
28
|
+
private closing = false
|
|
30
29
|
|
|
31
30
|
/* construct node */
|
|
32
31
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -280,12 +279,6 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
280
279
|
/* indicate closing first to stop all async operations */
|
|
281
280
|
this.closing = true
|
|
282
281
|
|
|
283
|
-
/* cleanup all timers */
|
|
284
|
-
if (this.connectionTimeout !== null) {
|
|
285
|
-
clearTimeout(this.connectionTimeout)
|
|
286
|
-
this.connectionTimeout = null
|
|
287
|
-
}
|
|
288
|
-
|
|
289
282
|
/* shutdown stream */
|
|
290
283
|
if (this.stream !== null) {
|
|
291
284
|
await util.destroyStream(this.stream)
|
|
@@ -23,12 +23,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
23
23
|
public static name = "a2t-openai"
|
|
24
24
|
|
|
25
25
|
/* internal state */
|
|
26
|
-
private openai: OpenAI
|
|
27
|
-
private ws: ws.WebSocket
|
|
28
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null>
|
|
29
|
-
private resampler: SpeexResampler
|
|
30
|
-
private closing
|
|
31
|
-
private connectionTimeout: ReturnType<typeof setTimeout>
|
|
26
|
+
private openai: OpenAI | null = null
|
|
27
|
+
private ws: ws.WebSocket | null = null
|
|
28
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
private resampler: SpeexResampler | null = null
|
|
30
|
+
private closing = false
|
|
31
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
32
32
|
|
|
33
33
|
/* construct node */
|
|
34
34
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -150,6 +150,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
150
150
|
})
|
|
151
151
|
this.ws.on("error", (err) => {
|
|
152
152
|
this.log("error", `WebSocket connection error: ${err}`)
|
|
153
|
+
if (!this.closing && this.queue !== null)
|
|
154
|
+
this.queue.write(null)
|
|
155
|
+
this.emit("error", err)
|
|
153
156
|
})
|
|
154
157
|
|
|
155
158
|
/* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
|
|
@@ -164,6 +167,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
164
167
|
}, new Map<string, any>())
|
|
165
168
|
}
|
|
166
169
|
|
|
170
|
+
/* track transcription text */
|
|
167
171
|
let text = ""
|
|
168
172
|
this.ws.on("message", (data) => {
|
|
169
173
|
let ev: any
|
|
@@ -353,7 +357,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
353
357
|
this.ws.close()
|
|
354
358
|
this.ws = null
|
|
355
359
|
}
|
|
356
|
-
this.openai
|
|
360
|
+
if (this.openai !== null)
|
|
361
|
+
this.openai = null
|
|
357
362
|
|
|
358
363
|
/* close resampler */
|
|
359
364
|
this.resampler = null
|