speechflow 1.7.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +388 -120
- package/etc/claude.md +5 -5
- package/etc/speechflow.yaml +2 -2
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
- package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-main-graph.js +30 -9
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
- package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +31 -4
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +314 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +215 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +192 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +619 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +161 -0
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +48 -100
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +224 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.js +78 -67
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +502 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +8 -5
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-error.js +5 -0
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
- package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +2 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/package.json +35 -18
- package/speechflow-cli/src/lib.d.ts +5 -0
- package/speechflow-cli/src/speechflow-main-api.ts +16 -16
- package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
- package/speechflow-cli/src/speechflow-main-graph.ts +38 -14
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +31 -4
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +315 -0
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +203 -0
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +176 -0
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +710 -0
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +3 -4
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +137 -0
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +188 -0
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +8 -8
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +211 -0
- package/speechflow-cli/src/speechflow-node-xio-file.ts +91 -80
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +535 -0
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
- package/speechflow-cli/src/speechflow-util-audio.ts +10 -5
- package/speechflow-cli/src/speechflow-util-error.ts +9 -0
- package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +3 -3
- package/speechflow-cli/src/speechflow-util.ts +2 -0
- package/speechflow-ui-db/package.json +9 -9
- package/speechflow-ui-st/package.json +9 -9
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -247
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
|
@@ -85,6 +85,18 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
/* helper function for tail timer handling */
|
|
89
|
+
const startTailTimer = () => {
|
|
90
|
+
tail = true
|
|
91
|
+
clearTailTimer()
|
|
92
|
+
this.tailTimer = setTimeout(() => {
|
|
93
|
+
if (this.closing || this.tailTimer === null)
|
|
94
|
+
return
|
|
95
|
+
tail = false
|
|
96
|
+
this.tailTimer = null
|
|
97
|
+
}, this.params.postSpeechTail)
|
|
98
|
+
}
|
|
99
|
+
|
|
88
100
|
/* establish Voice Activity Detection (VAD) facility */
|
|
89
101
|
let tail = false
|
|
90
102
|
try {
|
|
@@ -111,31 +123,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
111
123
|
return
|
|
112
124
|
const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
|
|
113
125
|
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
114
|
-
if (this.params.mode === "unplugged")
|
|
115
|
-
|
|
116
|
-
clearTailTimer()
|
|
117
|
-
this.tailTimer = setTimeout(() => {
|
|
118
|
-
if (this.closing || this.tailTimer === null)
|
|
119
|
-
return
|
|
120
|
-
tail = false
|
|
121
|
-
this.tailTimer = null
|
|
122
|
-
}, this.params.postSpeechTail)
|
|
123
|
-
}
|
|
126
|
+
if (this.params.mode === "unplugged")
|
|
127
|
+
startTailTimer()
|
|
124
128
|
},
|
|
125
129
|
onVADMisfire: () => {
|
|
126
130
|
if (this.closing)
|
|
127
131
|
return
|
|
128
132
|
this.log("info", "VAD: speech end (segment too short)")
|
|
129
|
-
if (this.params.mode === "unplugged")
|
|
130
|
-
|
|
131
|
-
clearTailTimer()
|
|
132
|
-
this.tailTimer = setTimeout(() => {
|
|
133
|
-
if (this.closing || this.tailTimer === null)
|
|
134
|
-
return
|
|
135
|
-
tail = false
|
|
136
|
-
this.tailTimer = null
|
|
137
|
-
}, this.params.postSpeechTail)
|
|
138
|
-
}
|
|
133
|
+
if (this.params.mode === "unplugged")
|
|
134
|
+
startTailTimer()
|
|
139
135
|
},
|
|
140
136
|
onFrameProcessed: (audio) => {
|
|
141
137
|
if (this.closing)
|
|
@@ -144,7 +140,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
144
140
|
/* annotate the current audio segment */
|
|
145
141
|
const element = this.queueVAD.peek()
|
|
146
142
|
if (element === undefined || element.type !== "audio-frame")
|
|
147
|
-
throw new Error("internal error
|
|
143
|
+
throw new Error("internal error that cannot happen: no more queued element")
|
|
148
144
|
if (element.segmentIdx >= element.segmentData.length)
|
|
149
145
|
throw new Error("segment index out of bounds")
|
|
150
146
|
const segment = element.segmentData[element.segmentIdx++]
|
|
@@ -227,6 +223,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
227
223
|
}
|
|
228
224
|
}
|
|
229
225
|
|
|
226
|
+
/* signal completion */
|
|
230
227
|
callback()
|
|
231
228
|
}
|
|
232
229
|
catch (error) {
|
|
@@ -322,6 +319,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
322
319
|
}
|
|
323
320
|
}
|
|
324
321
|
|
|
322
|
+
/* peek at send queue element */
|
|
325
323
|
const element = self.queueSend.peek()
|
|
326
324
|
if (element !== undefined && element.type === "audio-eof")
|
|
327
325
|
this.push(null)
|
|
@@ -371,8 +369,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
371
369
|
if (this.vad !== null) {
|
|
372
370
|
try {
|
|
373
371
|
const flushPromise = this.vad.flush()
|
|
374
|
-
const timeoutPromise = new Promise((resolve) =>
|
|
375
|
-
setTimeout(resolve, 5000))
|
|
372
|
+
const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
|
|
376
373
|
await Promise.race([ flushPromise, timeoutPromise ])
|
|
377
374
|
}
|
|
378
375
|
catch (error) {
|
|
@@ -21,15 +21,18 @@ const writeWavHeader = (
|
|
|
21
21
|
const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
|
|
22
22
|
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
|
|
23
23
|
|
|
24
|
+
/* determine header dimensions */
|
|
24
25
|
const headerLength = 44
|
|
25
26
|
const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
|
|
26
27
|
const dataLength = length ?? maxDataSize
|
|
27
28
|
const fileSize = dataLength + headerLength
|
|
28
29
|
const header = Buffer.alloc(headerLength)
|
|
29
30
|
|
|
31
|
+
/* calculate byte rate and block alignment */
|
|
30
32
|
const byteRate = (sampleRate * channels * bitDepth) / 8
|
|
31
33
|
const blockAlign = (channels * bitDepth) / 8
|
|
32
34
|
|
|
35
|
+
/* write header fields */
|
|
33
36
|
let offset = 0
|
|
34
37
|
header.write("RIFF", offset); offset += 4
|
|
35
38
|
header.writeUInt32LE(fileSize - 8, offset); offset += 4
|
|
@@ -45,6 +48,7 @@ const writeWavHeader = (
|
|
|
45
48
|
header.write("data", offset); offset += 4
|
|
46
49
|
header.writeUInt32LE(dataLength, offset); offset += 4
|
|
47
50
|
|
|
51
|
+
/* return completed header */
|
|
48
52
|
return header
|
|
49
53
|
}
|
|
50
54
|
|
|
@@ -53,6 +57,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
53
57
|
if (buffer.length < 44)
|
|
54
58
|
throw new Error("WAV header too short, expected at least 44 bytes")
|
|
55
59
|
|
|
60
|
+
/* read header fields */
|
|
56
61
|
let offset = 0
|
|
57
62
|
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
58
63
|
const fileSize = buffer.readUInt32LE(offset); offset += 4
|
|
@@ -68,6 +73,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
68
73
|
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
69
74
|
const dataLength = buffer.readUInt32LE(offset); offset += 4
|
|
70
75
|
|
|
76
|
+
/* validate RIFF header */
|
|
71
77
|
if (riffHead !== "RIFF")
|
|
72
78
|
throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
|
|
73
79
|
if (waveHead !== "WAVE")
|
|
@@ -77,6 +83,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
77
83
|
if (data !== "data")
|
|
78
84
|
throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
|
|
79
85
|
|
|
86
|
+
/* return parsed header data */
|
|
80
87
|
return {
|
|
81
88
|
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
|
|
82
89
|
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
|
|
@@ -94,7 +101,8 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
94
101
|
|
|
95
102
|
/* declare node configuration parameters */
|
|
96
103
|
this.configure({
|
|
97
|
-
mode:
|
|
104
|
+
mode: { type: "string", pos: 0, val: "encode", match: /^(?:encode|decode)$/ },
|
|
105
|
+
seekable: { type: "boolean", pos: 1, val: false }
|
|
98
106
|
})
|
|
99
107
|
|
|
100
108
|
/* declare node input/output format */
|
|
@@ -106,7 +114,9 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
106
114
|
async open () {
|
|
107
115
|
/* establish a transform stream */
|
|
108
116
|
const self = this
|
|
109
|
-
let
|
|
117
|
+
let isFirstChunk = true
|
|
118
|
+
let headerChunkSent: SpeechFlowChunk | null = null
|
|
119
|
+
let totalSize = 0
|
|
110
120
|
this.stream = new Stream.Transform({
|
|
111
121
|
readableObjectMode: true,
|
|
112
122
|
writableObjectMode: true,
|
|
@@ -115,7 +125,7 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
115
125
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
116
126
|
if (!Buffer.isBuffer(chunk.payload))
|
|
117
127
|
callback(new Error("invalid chunk payload type"))
|
|
118
|
-
else if (
|
|
128
|
+
else if (isFirstChunk) {
|
|
119
129
|
if (self.params.mode === "encode") {
|
|
120
130
|
/* convert raw/PCM to WAV/PCM
|
|
121
131
|
(NOTICE: as this is a continuous stream, the
|
|
@@ -132,7 +142,9 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
132
142
|
const headerChunk = chunk.clone()
|
|
133
143
|
headerChunk.payload = headerBuffer
|
|
134
144
|
this.push(headerChunk)
|
|
145
|
+
headerChunkSent = headerChunk
|
|
135
146
|
this.push(chunk)
|
|
147
|
+
totalSize += chunk.payload.byteLength
|
|
136
148
|
callback()
|
|
137
149
|
}
|
|
138
150
|
else if (self.params.mode === "decode") {
|
|
@@ -173,21 +185,36 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
173
185
|
}
|
|
174
186
|
chunk.payload = chunk.payload.subarray(44)
|
|
175
187
|
this.push(chunk)
|
|
188
|
+
totalSize += chunk.payload.byteLength
|
|
176
189
|
callback()
|
|
177
190
|
}
|
|
178
191
|
else {
|
|
179
192
|
callback(new Error(`invalid operation mode "${self.params.mode}"`))
|
|
180
193
|
return
|
|
181
194
|
}
|
|
182
|
-
|
|
195
|
+
isFirstChunk = false
|
|
183
196
|
}
|
|
184
197
|
else {
|
|
185
198
|
/* pass-through original chunk */
|
|
186
199
|
this.push(chunk)
|
|
200
|
+
totalSize += chunk.payload.byteLength
|
|
187
201
|
callback()
|
|
188
202
|
}
|
|
189
203
|
},
|
|
190
204
|
final (callback) {
|
|
205
|
+
if (self.params.seekable && headerChunkSent !== null) {
|
|
206
|
+
self.log("info", "sending updated WAV header")
|
|
207
|
+
const headerBuffer = writeWavHeader(totalSize, {
|
|
208
|
+
audioFormat: 0x0001 /* PCM */,
|
|
209
|
+
channels: self.config.audioChannels,
|
|
210
|
+
sampleRate: self.config.audioSampleRate,
|
|
211
|
+
bitDepth: self.config.audioBitDepth
|
|
212
|
+
})
|
|
213
|
+
const headerChunk = headerChunkSent?.clone()
|
|
214
|
+
headerChunk.payload = headerBuffer
|
|
215
|
+
headerChunk.meta.set("chunk:seek", 0)
|
|
216
|
+
this.push(headerChunk)
|
|
217
|
+
}
|
|
191
218
|
callback()
|
|
192
219
|
}
|
|
193
220
|
})
|
|
@@ -53,7 +53,7 @@ class AsyncQueue<T> {
|
|
|
53
53
|
continue
|
|
54
54
|
}
|
|
55
55
|
else {
|
|
56
|
-
const it = await new Promise<IteratorResult<T>>((resolve) => this.resolvers.push(resolve))
|
|
56
|
+
const it = await new Promise<IteratorResult<T>>((resolve) => { this.resolvers.push(resolve) })
|
|
57
57
|
if (it.done)
|
|
58
58
|
return
|
|
59
59
|
yield it.value
|
|
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
68
68
|
public static name = "a2t-amazon"
|
|
69
69
|
|
|
70
70
|
/* internal state */
|
|
71
|
-
private client:
|
|
72
|
-
private clientStream:
|
|
73
|
-
private closing
|
|
74
|
-
private
|
|
75
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
71
|
+
private client: TranscribeStreamingClient | null = null
|
|
72
|
+
private clientStream: AsyncIterable<TranscriptResultStream> | null = null
|
|
73
|
+
private closing = false
|
|
74
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
76
75
|
|
|
77
76
|
/* construct node */
|
|
78
77
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -126,8 +125,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
126
125
|
secretAccessKey: this.params.secKey
|
|
127
126
|
}
|
|
128
127
|
})
|
|
129
|
-
if (this.client === null)
|
|
130
|
-
throw new Error("failed to establish Amazon Transcribe client")
|
|
131
128
|
|
|
132
129
|
/* create an AudioStream for Amazon Transcribe */
|
|
133
130
|
const audioQueue = new AsyncQueue<Uint8Array>()
|
|
@@ -236,11 +233,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
236
233
|
callback()
|
|
237
234
|
return
|
|
238
235
|
}
|
|
239
|
-
|
|
240
|
-
/* await all read operations */
|
|
241
236
|
await reads.awaitAll()
|
|
242
|
-
|
|
243
|
-
util.run(
|
|
237
|
+
util.run("closing Amazon Transcribe connection",
|
|
244
238
|
() => self.client!.destroy(),
|
|
245
239
|
(error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
|
|
246
240
|
)
|
|
@@ -279,12 +273,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
279
273
|
/* indicate closing first to stop all async operations */
|
|
280
274
|
this.closing = true
|
|
281
275
|
|
|
282
|
-
/* cleanup all timers */
|
|
283
|
-
if (this.connectionTimeout !== null) {
|
|
284
|
-
clearTimeout(this.connectionTimeout)
|
|
285
|
-
this.connectionTimeout = null
|
|
286
|
-
}
|
|
287
|
-
|
|
288
276
|
/* close queue */
|
|
289
277
|
if (this.queue !== null) {
|
|
290
278
|
this.queue.write(null)
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import * as GoogleSpeech from "@google-cloud/speech"
|
|
12
|
+
import { DateTime, Duration } from "luxon"
|
|
13
|
+
import * as arktype from "arktype"
|
|
14
|
+
|
|
15
|
+
/* internal dependencies */
|
|
16
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
17
|
+
import * as util from "./speechflow-util"
|
|
18
|
+
|
|
19
|
+
/* SpeechFlow node for Google Cloud speech-to-text conversion */
|
|
20
|
+
export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
21
|
+
/* declare official node name */
|
|
22
|
+
public static name = "a2t-google"
|
|
23
|
+
|
|
24
|
+
/* internal state */
|
|
25
|
+
private client: GoogleSpeech.SpeechClient | null = null
|
|
26
|
+
private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
|
|
27
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
28
|
+
private closing = false
|
|
29
|
+
|
|
30
|
+
/* construct node */
|
|
31
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
32
|
+
super(id, cfg, opts, args)
|
|
33
|
+
|
|
34
|
+
/* declare node configuration parameters */
|
|
35
|
+
this.configure({
|
|
36
|
+
key: { type: "string", val: process.env.SPEECHFLOW_GOOGLE_KEY ?? "" },
|
|
37
|
+
model: { type: "string", pos: 0, val: "latest_long" },
|
|
38
|
+
language: { type: "string", pos: 1, val: "en-US" },
|
|
39
|
+
interim: { type: "boolean", pos: 2, val: false }
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
/* validate API key */
|
|
43
|
+
if (this.params.key === "")
|
|
44
|
+
throw new Error("Google Cloud API credentials JSON key is required")
|
|
45
|
+
|
|
46
|
+
/* declare node input/output format */
|
|
47
|
+
this.input = "audio"
|
|
48
|
+
this.output = "text"
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/* one-time status of node */
|
|
52
|
+
async status () {
|
|
53
|
+
return {}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/* open node */
|
|
57
|
+
async open () {
|
|
58
|
+
/* sanity check situation */
|
|
59
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
60
|
+
throw new Error("Google Speech node currently supports PCM-S16LE audio only")
|
|
61
|
+
|
|
62
|
+
/* clear destruction flag */
|
|
63
|
+
this.closing = false
|
|
64
|
+
|
|
65
|
+
/* create queue for results */
|
|
66
|
+
this.queue = new util.SingleQueue<SpeechFlowChunk | null>()
|
|
67
|
+
|
|
68
|
+
/* create a store for the meta information */
|
|
69
|
+
const metastore = new util.TimeStore<Map<string, any>>()
|
|
70
|
+
|
|
71
|
+
/* instantiate Google Speech client */
|
|
72
|
+
const data = util.run("Google Cloud API credentials key", () =>
|
|
73
|
+
JSON.parse(this.params.key))
|
|
74
|
+
const credentials = util.importObject("Google Cloud API credentials key",
|
|
75
|
+
data,
|
|
76
|
+
arktype.type({
|
|
77
|
+
project_id: "string",
|
|
78
|
+
private_key: "string",
|
|
79
|
+
client_email: "string"
|
|
80
|
+
})
|
|
81
|
+
)
|
|
82
|
+
this.client = new GoogleSpeech.SpeechClient({
|
|
83
|
+
credentials: {
|
|
84
|
+
private_key: credentials.private_key,
|
|
85
|
+
client_email: credentials.client_email
|
|
86
|
+
},
|
|
87
|
+
projectId: credentials.project_id
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
/* create streaming recognition request */
|
|
91
|
+
this.recognizeStream = this.client.streamingRecognize({
|
|
92
|
+
config: {
|
|
93
|
+
encoding: "LINEAR16",
|
|
94
|
+
sampleRateHertz: this.config.audioSampleRate,
|
|
95
|
+
languageCode: this.params.language,
|
|
96
|
+
model: this.params.model,
|
|
97
|
+
enableAutomaticPunctuation: true,
|
|
98
|
+
enableWordTimeOffsets: true
|
|
99
|
+
},
|
|
100
|
+
interimResults: this.params.interim
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
/* hook onto Google Speech API events */
|
|
104
|
+
this.recognizeStream.on("data", (data: GoogleSpeech.protos.google.cloud.speech.v1.IStreamingRecognizeResponse) => {
|
|
105
|
+
if (this.closing || this.queue === null)
|
|
106
|
+
return
|
|
107
|
+
if (!data.results || data.results.length === 0)
|
|
108
|
+
return
|
|
109
|
+
for (const result of data.results) {
|
|
110
|
+
if (!result.alternatives || result.alternatives.length === 0)
|
|
111
|
+
continue
|
|
112
|
+
const alternative = result.alternatives[0]
|
|
113
|
+
const text = alternative.transcript ?? ""
|
|
114
|
+
if (text === "")
|
|
115
|
+
continue
|
|
116
|
+
const isFinal = result.isFinal ?? false
|
|
117
|
+
if (!isFinal && !this.params.interim)
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
/* calculate timestamps */
|
|
121
|
+
let tsStart = Duration.fromMillis(0)
|
|
122
|
+
let tsEnd = Duration.fromMillis(0)
|
|
123
|
+
|
|
124
|
+
/* extract word timing information if available */
|
|
125
|
+
const words: { word: string, start: Duration, end: Duration }[] = []
|
|
126
|
+
if (alternative.words && alternative.words.length > 0) {
|
|
127
|
+
for (const wordInfo of alternative.words) {
|
|
128
|
+
const wordStart = wordInfo.startTime
|
|
129
|
+
? Duration.fromMillis(
|
|
130
|
+
(Number(wordInfo.startTime.seconds ?? 0) * 1000) +
|
|
131
|
+
(Number(wordInfo.startTime.nanos ?? 0) / 1000000)
|
|
132
|
+
).plus(this.timeZeroOffset)
|
|
133
|
+
: Duration.fromMillis(0)
|
|
134
|
+
const wordEnd = wordInfo.endTime
|
|
135
|
+
? Duration.fromMillis(
|
|
136
|
+
(Number(wordInfo.endTime.seconds ?? 0) * 1000) +
|
|
137
|
+
(Number(wordInfo.endTime.nanos ?? 0) / 1000000)
|
|
138
|
+
).plus(this.timeZeroOffset)
|
|
139
|
+
: Duration.fromMillis(0)
|
|
140
|
+
words.push({
|
|
141
|
+
word: wordInfo.word ?? "",
|
|
142
|
+
start: wordStart,
|
|
143
|
+
end: wordEnd
|
|
144
|
+
})
|
|
145
|
+
}
|
|
146
|
+
if (words.length > 0) {
|
|
147
|
+
tsStart = words[0].start
|
|
148
|
+
tsEnd = words[words.length - 1].end
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
/* fallback: use result timing */
|
|
153
|
+
const resultEnd = result.resultEndTime
|
|
154
|
+
if (resultEnd) {
|
|
155
|
+
tsEnd = Duration.fromMillis(
|
|
156
|
+
(Number(resultEnd.seconds ?? 0) * 1000) +
|
|
157
|
+
(Number(resultEnd.nanos ?? 0) / 1000000)
|
|
158
|
+
).plus(this.timeZeroOffset)
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
this.log("info", `text received (start: ${tsStart.toMillis()}ms, ` +
|
|
162
|
+
`end: ${tsEnd.toMillis()}ms, ` +
|
|
163
|
+
`kind: ${isFinal ? "final" : "intermediate"}): ` +
|
|
164
|
+
`"${text}"`)
|
|
165
|
+
|
|
166
|
+
/* fetch and merge meta information */
|
|
167
|
+
const metas = metastore.fetch(tsStart, tsEnd)
|
|
168
|
+
const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
169
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
170
|
+
return prev
|
|
171
|
+
}, new Map<string, any>())
|
|
172
|
+
metastore.prune(tsStart)
|
|
173
|
+
|
|
174
|
+
/* add word timing to meta */
|
|
175
|
+
if (words.length > 0)
|
|
176
|
+
meta.set("words", words)
|
|
177
|
+
|
|
178
|
+
/* create and enqueue chunk */
|
|
179
|
+
const chunk = new SpeechFlowChunk(tsStart, tsEnd,
|
|
180
|
+
isFinal ? "final" : "intermediate", "text", text, meta)
|
|
181
|
+
this.queue.write(chunk)
|
|
182
|
+
}
|
|
183
|
+
})
|
|
184
|
+
this.recognizeStream.on("error", (error: Error) => {
|
|
185
|
+
this.log("error", `error: ${error.message}`)
|
|
186
|
+
if (!this.closing && this.queue !== null)
|
|
187
|
+
this.queue.write(null)
|
|
188
|
+
this.emit("error", error)
|
|
189
|
+
})
|
|
190
|
+
this.recognizeStream.on("end", () => {
|
|
191
|
+
this.log("info", "stream ended")
|
|
192
|
+
if (!this.closing && this.queue !== null)
|
|
193
|
+
this.queue.write(null)
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
/* remember opening time to receive time zero offset */
|
|
197
|
+
this.timeOpen = DateTime.now()
|
|
198
|
+
|
|
199
|
+
/* provide Duplex stream and internally attach to Google Speech API */
|
|
200
|
+
const self = this
|
|
201
|
+
const reads = new util.PromiseSet<void>()
|
|
202
|
+
this.stream = new Stream.Duplex({
|
|
203
|
+
writableObjectMode: true,
|
|
204
|
+
readableObjectMode: true,
|
|
205
|
+
decodeStrings: false,
|
|
206
|
+
highWaterMark: 1,
|
|
207
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
208
|
+
if (self.closing || self.recognizeStream === null) {
|
|
209
|
+
callback(new Error("stream already destroyed"))
|
|
210
|
+
return
|
|
211
|
+
}
|
|
212
|
+
if (chunk.type !== "audio")
|
|
213
|
+
callback(new Error("expected audio input chunk"))
|
|
214
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
215
|
+
callback(new Error("expected Buffer input chunk"))
|
|
216
|
+
else {
|
|
217
|
+
if (chunk.payload.byteLength > 0) {
|
|
218
|
+
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
219
|
+
if (chunk.meta.size > 0)
|
|
220
|
+
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
221
|
+
try {
|
|
222
|
+
self.recognizeStream.write(chunk.payload)
|
|
223
|
+
}
|
|
224
|
+
catch (error) {
|
|
225
|
+
callback(util.ensureError(error, "failed to send to Google Speech"))
|
|
226
|
+
return
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
callback()
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
async final (callback) {
|
|
233
|
+
/* short-circuiting in case of own closing */
|
|
234
|
+
if (self.closing || self.recognizeStream === null) {
|
|
235
|
+
callback()
|
|
236
|
+
return
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/* close Google Speech stream */
|
|
240
|
+
try {
|
|
241
|
+
self.recognizeStream.end()
|
|
242
|
+
}
|
|
243
|
+
catch (error) {
|
|
244
|
+
self.log("warning", `error closing Google Speech stream: ${error}`)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/* await all read operations */
|
|
248
|
+
await reads.awaitAll()
|
|
249
|
+
callback()
|
|
250
|
+
},
|
|
251
|
+
read (size) {
|
|
252
|
+
if (self.closing || self.queue === null) {
|
|
253
|
+
this.push(null)
|
|
254
|
+
return
|
|
255
|
+
}
|
|
256
|
+
reads.add(self.queue.read().then((chunk) => {
|
|
257
|
+
if (self.closing || self.queue === null) {
|
|
258
|
+
this.push(null)
|
|
259
|
+
return
|
|
260
|
+
}
|
|
261
|
+
if (chunk === null) {
|
|
262
|
+
self.log("info", "received EOF signal")
|
|
263
|
+
this.push(null)
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
267
|
+
this.push(chunk)
|
|
268
|
+
}
|
|
269
|
+
}).catch((error: unknown) => {
|
|
270
|
+
if (!self.closing && self.queue !== null)
|
|
271
|
+
self.log("error", `queue read error: ${util.ensureError(error).message}`)
|
|
272
|
+
}))
|
|
273
|
+
}
|
|
274
|
+
})
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/* close node */
|
|
278
|
+
async close () {
|
|
279
|
+
/* indicate closing first to stop all async operations */
|
|
280
|
+
this.closing = true
|
|
281
|
+
|
|
282
|
+
/* shutdown stream */
|
|
283
|
+
if (this.stream !== null) {
|
|
284
|
+
await util.destroyStream(this.stream)
|
|
285
|
+
this.stream = null
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/* close Google Speech stream and client */
|
|
289
|
+
if (this.recognizeStream !== null) {
|
|
290
|
+
try {
|
|
291
|
+
this.recognizeStream.removeAllListeners()
|
|
292
|
+
this.recognizeStream.destroy()
|
|
293
|
+
}
|
|
294
|
+
catch (error) {
|
|
295
|
+
this.log("warning", `error during Google Speech stream cleanup: ${error}`)
|
|
296
|
+
}
|
|
297
|
+
this.recognizeStream = null
|
|
298
|
+
}
|
|
299
|
+
if (this.client !== null) {
|
|
300
|
+
try {
|
|
301
|
+
await this.client.close()
|
|
302
|
+
}
|
|
303
|
+
catch (error) {
|
|
304
|
+
this.log("warning", `error closing Google Speech client: ${error}`)
|
|
305
|
+
}
|
|
306
|
+
this.client = null
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/* signal EOF to any pending read operations */
|
|
310
|
+
if (this.queue !== null) {
|
|
311
|
+
this.queue.write(null)
|
|
312
|
+
this.queue = null
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
@@ -23,12 +23,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
23
23
|
public static name = "a2t-openai"
|
|
24
24
|
|
|
25
25
|
/* internal state */
|
|
26
|
-
private openai: OpenAI
|
|
27
|
-
private ws: ws.WebSocket
|
|
28
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null>
|
|
29
|
-
private resampler: SpeexResampler
|
|
30
|
-
private closing
|
|
31
|
-
private connectionTimeout: ReturnType<typeof setTimeout>
|
|
26
|
+
private openai: OpenAI | null = null
|
|
27
|
+
private ws: ws.WebSocket | null = null
|
|
28
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
private resampler: SpeexResampler | null = null
|
|
30
|
+
private closing = false
|
|
31
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
32
32
|
|
|
33
33
|
/* construct node */
|
|
34
34
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -150,6 +150,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
150
150
|
})
|
|
151
151
|
this.ws.on("error", (err) => {
|
|
152
152
|
this.log("error", `WebSocket connection error: ${err}`)
|
|
153
|
+
if (!this.closing && this.queue !== null)
|
|
154
|
+
this.queue.write(null)
|
|
155
|
+
this.emit("error", err)
|
|
153
156
|
})
|
|
154
157
|
|
|
155
158
|
/* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
|
|
@@ -164,6 +167,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
164
167
|
}, new Map<string, any>())
|
|
165
168
|
}
|
|
166
169
|
|
|
170
|
+
/* track transcription text */
|
|
167
171
|
let text = ""
|
|
168
172
|
this.ws.on("message", (data) => {
|
|
169
173
|
let ev: any
|
|
@@ -353,7 +357,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
353
357
|
this.ws.close()
|
|
354
358
|
this.ws = null
|
|
355
359
|
}
|
|
356
|
-
this.openai
|
|
360
|
+
if (this.openai !== null)
|
|
361
|
+
this.openai = null
|
|
357
362
|
|
|
358
363
|
/* close resampler */
|
|
359
364
|
this.resampler = null
|