speechflow 1.6.6 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +28 -14
- package/etc/secretlint.json +7 -0
- package/etc/speechflow.yaml +13 -4
- package/etc/stx.conf +3 -2
- package/package.json +9 -7
- package/speechflow-cli/dst/speechflow-main-api.js +3 -3
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
- package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +10 -9
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +35 -1
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -2
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +16 -13
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +21 -16
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +75 -46
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +39 -39
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +0 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +2 -2
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -3
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +7 -2
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +14 -4
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.js +1 -1
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +3 -3
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
- package/speechflow-cli/dst/speechflow-util-queue.js +33 -12
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
- package/speechflow-cli/dst/speechflow-util-stream.js +13 -17
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +7 -1
- package/speechflow-cli/etc/stx.conf +1 -0
- package/speechflow-cli/package.json +31 -30
- package/speechflow-cli/src/speechflow-main-api.ts +3 -6
- package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
- package/speechflow-cli/src/speechflow-main-graph.ts +10 -11
- package/speechflow-cli/src/speechflow-main-status.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +1 -2
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +0 -1
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +21 -16
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +24 -16
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +86 -54
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +41 -38
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +0 -1
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +0 -5
- package/speechflow-cli/src/speechflow-node-xio-device.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -3
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +7 -2
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +14 -4
- package/speechflow-cli/src/speechflow-node.ts +1 -1
- package/speechflow-cli/src/speechflow-util-audio.ts +3 -3
- package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +36 -16
- package/speechflow-cli/src/speechflow-util-stream.ts +24 -21
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/dst/app-font-fa-brands-400.woff2 +0 -0
- package/speechflow-ui-db/dst/app-font-fa-regular-400.woff2 +0 -0
- package/speechflow-ui-db/dst/app-font-fa-solid-900.woff2 +0 -0
- package/speechflow-ui-db/dst/app-font-fa-v4compatibility.woff2 +0 -0
- package/speechflow-ui-db/dst/index.css +1 -5
- package/speechflow-ui-db/dst/index.js +17 -58
- package/speechflow-ui-db/etc/stx.conf +5 -16
- package/speechflow-ui-db/package.json +20 -19
- package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
- package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
- package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
- package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
- package/speechflow-ui-st/dst/index.css +1 -5
- package/speechflow-ui-st/dst/index.js +36 -79
- package/speechflow-ui-st/etc/stx.conf +5 -16
- package/speechflow-ui-st/package.json +21 -20
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js +0 -149
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2.d.ts +0 -13
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js +0 -202
- package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js.map +0 -1
|
@@ -187,6 +187,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
187
187
|
|
|
188
188
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
189
189
|
const self = this
|
|
190
|
+
const reads = new util.PromiseSet<void>()
|
|
190
191
|
this.stream = new Stream.Duplex({
|
|
191
192
|
writableObjectMode: true,
|
|
192
193
|
readableObjectMode: true,
|
|
@@ -217,12 +218,33 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
217
218
|
callback()
|
|
218
219
|
}
|
|
219
220
|
},
|
|
221
|
+
async final (callback) {
|
|
222
|
+
/* short-circuiting in case of own closing */
|
|
223
|
+
if (self.closing || self.dg === null) {
|
|
224
|
+
callback()
|
|
225
|
+
return
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/* close Deepgram API */
|
|
229
|
+
try {
|
|
230
|
+
self.dg.requestClose()
|
|
231
|
+
}
|
|
232
|
+
catch (error) {
|
|
233
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/* await all read operations */
|
|
237
|
+
await reads.awaitAll()
|
|
238
|
+
|
|
239
|
+
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
240
|
+
callback()
|
|
241
|
+
},
|
|
220
242
|
read (size) {
|
|
221
243
|
if (self.closing || self.queue === null) {
|
|
222
244
|
this.push(null)
|
|
223
245
|
return
|
|
224
246
|
}
|
|
225
|
-
self.queue.read().then((chunk) => {
|
|
247
|
+
reads.add(self.queue.read().then((chunk) => {
|
|
226
248
|
if (self.closing || self.queue === null) {
|
|
227
249
|
this.push(null)
|
|
228
250
|
return
|
|
@@ -238,21 +260,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
238
260
|
}).catch((error: unknown) => {
|
|
239
261
|
if (!self.closing && self.queue !== null)
|
|
240
262
|
self.log("error", `queue read error: ${util.ensureError(error).message}`)
|
|
241
|
-
})
|
|
242
|
-
},
|
|
243
|
-
final (callback) {
|
|
244
|
-
if (self.closing || self.dg === null) {
|
|
245
|
-
callback()
|
|
246
|
-
return
|
|
247
|
-
}
|
|
248
|
-
try {
|
|
249
|
-
self.dg.requestClose()
|
|
250
|
-
}
|
|
251
|
-
catch (error) {
|
|
252
|
-
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
253
|
-
}
|
|
254
|
-
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
255
|
-
callback()
|
|
263
|
+
}))
|
|
256
264
|
}
|
|
257
265
|
})
|
|
258
266
|
}
|
|
@@ -9,7 +9,7 @@ import Stream from "node:stream"
|
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import OpenAI from "openai"
|
|
12
|
-
import { DateTime }
|
|
12
|
+
import { DateTime, Duration } from "luxon"
|
|
13
13
|
import SpeexResampler from "speex-resampler"
|
|
14
14
|
import ws from "ws"
|
|
15
15
|
|
|
@@ -23,11 +23,11 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
23
23
|
public static name = "a2t-openai"
|
|
24
24
|
|
|
25
25
|
/* internal state */
|
|
26
|
-
private openai:
|
|
27
|
-
private ws:
|
|
28
|
-
private queue:
|
|
29
|
-
private resampler:
|
|
30
|
-
private closing
|
|
26
|
+
private openai: OpenAI | null = null
|
|
27
|
+
private ws: ws.WebSocket | null = null
|
|
28
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
private resampler: SpeexResampler | null = null
|
|
30
|
+
private closing = false
|
|
31
31
|
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
32
32
|
|
|
33
33
|
/* construct node */
|
|
@@ -141,11 +141,25 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
141
141
|
})
|
|
142
142
|
this.ws.on("close", () => {
|
|
143
143
|
this.log("info", "WebSocket connection closed")
|
|
144
|
-
this.queue
|
|
144
|
+
if (!this.closing && this.queue !== null)
|
|
145
|
+
this.queue.write(null)
|
|
145
146
|
})
|
|
146
147
|
this.ws.on("error", (err) => {
|
|
147
148
|
this.log("error", `WebSocket connection error: ${err}`)
|
|
148
149
|
})
|
|
150
|
+
|
|
151
|
+
/* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
|
|
152
|
+
const speechTiming = new Map<string, { startMs: number, endMs: number }>()
|
|
153
|
+
|
|
154
|
+
/* helper function for aggregating meta information */
|
|
155
|
+
const aggregateMeta = (start: Duration, end: Duration): Map<string, any> => {
|
|
156
|
+
const metas = metastore.fetch(start, end)
|
|
157
|
+
return metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
158
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
159
|
+
return prev
|
|
160
|
+
}, new Map<string, any>())
|
|
161
|
+
}
|
|
162
|
+
|
|
149
163
|
let text = ""
|
|
150
164
|
this.ws.on("message", (data) => {
|
|
151
165
|
let ev: any
|
|
@@ -163,53 +177,63 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
163
177
|
switch (ev.type) {
|
|
164
178
|
case "transcription_session.created":
|
|
165
179
|
break
|
|
166
|
-
case "conversation.item.created":
|
|
180
|
+
case "conversation.item.created": {
|
|
167
181
|
text = ""
|
|
168
182
|
break
|
|
183
|
+
}
|
|
169
184
|
case "conversation.item.input_audio_transcription.delta": {
|
|
170
185
|
text += ev.delta as string
|
|
171
|
-
if (this.params.interim) {
|
|
172
|
-
const
|
|
173
|
-
const
|
|
174
|
-
const
|
|
175
|
-
const
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
|
|
180
|
-
chunk.meta = meta
|
|
181
|
-
this.queue!.write(chunk)
|
|
186
|
+
if (this.params.interim && !this.closing && this.queue !== null) {
|
|
187
|
+
const itemId = ev.item_id as string
|
|
188
|
+
const timing = speechTiming.get(itemId)
|
|
189
|
+
const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
190
|
+
const end = timing ? Duration.fromMillis(timing.endMs) : start
|
|
191
|
+
const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
|
|
192
|
+
chunk.meta = aggregateMeta(start, end)
|
|
193
|
+
this.queue.write(chunk)
|
|
182
194
|
}
|
|
183
195
|
break
|
|
184
196
|
}
|
|
185
197
|
case "conversation.item.input_audio_transcription.completed": {
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
198
|
+
if (!this.closing && this.queue !== null) {
|
|
199
|
+
text = ev.transcript as string
|
|
200
|
+
const itemId = ev.item_id as string
|
|
201
|
+
const timing = speechTiming.get(itemId)
|
|
202
|
+
const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
203
|
+
const end = timing ? Duration.fromMillis(timing.endMs) : start
|
|
204
|
+
const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
|
|
205
|
+
chunk.meta = aggregateMeta(start, end)
|
|
206
|
+
metastore.prune(start)
|
|
207
|
+
speechTiming.delete(itemId)
|
|
208
|
+
this.queue.write(chunk)
|
|
209
|
+
text = ""
|
|
210
|
+
}
|
|
199
211
|
break
|
|
200
212
|
}
|
|
201
|
-
case "input_audio_buffer.speech_started":
|
|
213
|
+
case "input_audio_buffer.speech_started": {
|
|
202
214
|
this.log("info", "VAD: speech started")
|
|
215
|
+
const itemId = ev.item_id as string
|
|
216
|
+
const audioStartMs = ev.audio_start_ms as number
|
|
217
|
+
speechTiming.set(itemId, { startMs: audioStartMs, endMs: audioStartMs })
|
|
203
218
|
break
|
|
204
|
-
|
|
219
|
+
}
|
|
220
|
+
case "input_audio_buffer.speech_stopped": {
|
|
205
221
|
this.log("info", "VAD: speech stopped")
|
|
222
|
+
const itemId = ev.item_id as string
|
|
223
|
+
const audioEndMs = ev.audio_end_ms as number
|
|
224
|
+
const timing = speechTiming.get(itemId)
|
|
225
|
+
if (timing)
|
|
226
|
+
timing.endMs = audioEndMs
|
|
206
227
|
break
|
|
207
|
-
|
|
228
|
+
}
|
|
229
|
+
case "input_audio_buffer.committed": {
|
|
208
230
|
this.log("info", "input buffer committed")
|
|
209
231
|
break
|
|
210
|
-
|
|
232
|
+
}
|
|
233
|
+
case "error": {
|
|
211
234
|
this.log("error", `error: ${ev.error?.message}`)
|
|
212
235
|
break
|
|
236
|
+
}
|
|
213
237
|
default:
|
|
214
238
|
break
|
|
215
239
|
}
|
|
@@ -220,6 +244,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
220
244
|
|
|
221
245
|
/* provide Duplex stream and internally attach to OpenAI API */
|
|
222
246
|
const self = this
|
|
247
|
+
const reads = new util.PromiseSet<void>()
|
|
223
248
|
this.stream = new Stream.Duplex({
|
|
224
249
|
writableObjectMode: true,
|
|
225
250
|
readableObjectMode: true,
|
|
@@ -255,12 +280,32 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
255
280
|
callback()
|
|
256
281
|
}
|
|
257
282
|
},
|
|
283
|
+
async final (callback) {
|
|
284
|
+
if (self.closing || self.ws === null) {
|
|
285
|
+
callback()
|
|
286
|
+
return
|
|
287
|
+
}
|
|
288
|
+
try {
|
|
289
|
+
sendMessage({ type: "input_audio_buffer.commit" })
|
|
290
|
+
self.ws.close()
|
|
291
|
+
await util.sleep(50)
|
|
292
|
+
}
|
|
293
|
+
catch (error) {
|
|
294
|
+
self.log("warning", `error closing OpenAI connection: ${error}`)
|
|
295
|
+
}
|
|
296
|
+
await reads.awaitAll()
|
|
297
|
+
const chunks: Array<SpeechFlowChunk | null> = self.queue?.drain() ?? []
|
|
298
|
+
for (const chunk of chunks)
|
|
299
|
+
this.push(chunk)
|
|
300
|
+
this.push(null)
|
|
301
|
+
callback()
|
|
302
|
+
},
|
|
258
303
|
read (size) {
|
|
259
304
|
if (self.closing || self.queue === null) {
|
|
260
305
|
this.push(null)
|
|
261
306
|
return
|
|
262
307
|
}
|
|
263
|
-
self.queue.read().then((chunk) => {
|
|
308
|
+
reads.add(self.queue.read().then((chunk) => {
|
|
264
309
|
if (self.closing || self.queue === null) {
|
|
265
310
|
this.push(null)
|
|
266
311
|
return
|
|
@@ -276,23 +321,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
276
321
|
}).catch((error: unknown) => {
|
|
277
322
|
if (!self.closing && self.queue !== null)
|
|
278
323
|
self.log("error", `queue read error: ${util.ensureError(error).message}`)
|
|
279
|
-
})
|
|
280
|
-
},
|
|
281
|
-
final (callback) {
|
|
282
|
-
if (self.closing || self.ws === null) {
|
|
283
|
-
callback()
|
|
284
|
-
return
|
|
285
|
-
}
|
|
286
|
-
try {
|
|
287
|
-
sendMessage({ type: "input_audio_buffer.commit" })
|
|
288
|
-
self.ws.close()
|
|
289
|
-
/* NOTICE: do not push null here -- let the OpenAI close event handle it */
|
|
290
|
-
callback()
|
|
291
|
-
}
|
|
292
|
-
catch (error) {
|
|
293
|
-
self.log("warning", `error closing OpenAI connection: ${error}`)
|
|
294
|
-
callback(util.ensureError(error, "failed to close OpenAI connection"))
|
|
295
|
-
}
|
|
324
|
+
}))
|
|
296
325
|
}
|
|
297
326
|
})
|
|
298
327
|
}
|
|
@@ -316,11 +345,14 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
316
345
|
|
|
317
346
|
/* close OpenAI connection */
|
|
318
347
|
if (this.ws !== null) {
|
|
348
|
+
this.ws.removeAllListeners()
|
|
319
349
|
this.ws.close()
|
|
320
350
|
this.ws = null
|
|
321
351
|
}
|
|
322
352
|
if (this.openai !== null)
|
|
323
353
|
this.openai = null
|
|
354
|
+
if (this.resampler !== null)
|
|
355
|
+
this.resampler = null
|
|
324
356
|
|
|
325
357
|
/* shutdown stream */
|
|
326
358
|
if (this.stream !== null) {
|
|
@@ -10,6 +10,7 @@ import Stream from "node:stream"
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
|
|
12
12
|
import { getStreamAsBuffer } from "get-stream"
|
|
13
|
+
import { Duration } from "luxon"
|
|
13
14
|
import SpeexResampler from "speex-resampler"
|
|
14
15
|
|
|
15
16
|
/* internal dependencies */
|
|
@@ -140,58 +141,60 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
140
141
|
readableObjectMode: true,
|
|
141
142
|
decodeStrings: false,
|
|
142
143
|
highWaterMark: 1,
|
|
143
|
-
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
144
|
+
async transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
144
145
|
if (self.closing)
|
|
145
146
|
callback(new Error("stream already destroyed"))
|
|
146
147
|
else if (Buffer.isBuffer(chunk.payload))
|
|
147
148
|
callback(new Error("invalid chunk payload type"))
|
|
148
149
|
else {
|
|
149
|
-
|
|
150
|
-
|
|
150
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
151
|
+
processTimeout = null
|
|
152
|
+
callback(new Error("ElevenLabs API timeout"))
|
|
153
|
+
}, 60 * 1000)
|
|
154
|
+
const clearProcessTimeout = () => {
|
|
155
|
+
if (processTimeout !== null) {
|
|
156
|
+
clearTimeout(processTimeout)
|
|
151
157
|
processTimeout = null
|
|
152
|
-
callback(new Error("ElevenLabs API timeout"))
|
|
153
|
-
}, 60 * 1000)
|
|
154
|
-
const clearProcessTimeout = () => {
|
|
155
|
-
if (processTimeout !== null) {
|
|
156
|
-
clearTimeout(processTimeout)
|
|
157
|
-
processTimeout = null
|
|
158
|
-
}
|
|
159
158
|
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
callback(new Error("stream destroyed during processing"))
|
|
164
|
-
return
|
|
165
|
-
}
|
|
166
|
-
const stream = await speechStream(chunk.payload as string)
|
|
167
|
-
const buffer = await getStreamAsBuffer(stream)
|
|
168
|
-
if (self.closing) {
|
|
169
|
-
clearProcessTimeout()
|
|
170
|
-
callback(new Error("stream destroyed during processing"))
|
|
171
|
-
return
|
|
172
|
-
}
|
|
173
|
-
const bufferResampled = self.resampler!.processChunk(buffer)
|
|
174
|
-
self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
|
|
175
|
-
const chunkNew = chunk.clone()
|
|
176
|
-
chunkNew.type = "audio"
|
|
177
|
-
chunkNew.payload = bufferResampled
|
|
159
|
+
}
|
|
160
|
+
try {
|
|
161
|
+
if (self.closing) {
|
|
178
162
|
clearProcessTimeout()
|
|
179
|
-
|
|
180
|
-
|
|
163
|
+
callback(new Error("stream destroyed during processing"))
|
|
164
|
+
return
|
|
181
165
|
}
|
|
182
|
-
|
|
166
|
+
const stream = await speechStream(chunk.payload as string)
|
|
167
|
+
const buffer = await getStreamAsBuffer(stream)
|
|
168
|
+
if (self.closing) {
|
|
183
169
|
clearProcessTimeout()
|
|
184
|
-
callback(
|
|
170
|
+
callback(new Error("stream destroyed during processing"))
|
|
171
|
+
return
|
|
185
172
|
}
|
|
186
|
-
|
|
173
|
+
self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
|
|
174
|
+
const bufferResampled = self.resampler!.processChunk(buffer)
|
|
175
|
+
self.log("info", "ElevenLabs: forwarding resampled audio " +
|
|
176
|
+
`(buffer length: ${bufferResampled.byteLength})`)
|
|
177
|
+
|
|
178
|
+
/* calculate actual audio duration from PCM buffer size */
|
|
179
|
+
const durationMs = util.audioBufferDuration(bufferResampled,
|
|
180
|
+
self.config.audioSampleRate, self.config.audioBitDepth) * 1000
|
|
181
|
+
|
|
182
|
+
/* create new chunk with recalculated timestamps */
|
|
183
|
+
const chunkNew = chunk.clone()
|
|
184
|
+
chunkNew.type = "audio"
|
|
185
|
+
chunkNew.payload = bufferResampled
|
|
186
|
+
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
187
|
+
clearProcessTimeout()
|
|
188
|
+
this.push(chunkNew)
|
|
189
|
+
callback()
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
clearProcessTimeout()
|
|
193
|
+
callback(util.ensureError(error, "ElevenLabs processing failed"))
|
|
194
|
+
}
|
|
187
195
|
}
|
|
188
196
|
},
|
|
189
197
|
final (callback) {
|
|
190
|
-
if (self.closing) {
|
|
191
|
-
callback()
|
|
192
|
-
return
|
|
193
|
-
}
|
|
194
|
-
this.push(null)
|
|
195
198
|
callback()
|
|
196
199
|
}
|
|
197
200
|
})
|
|
@@ -98,7 +98,7 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
|
|
|
98
98
|
if (!retriable || attempt >= maxRetries)
|
|
99
99
|
break
|
|
100
100
|
const delayMs = Math.min(1000 * Math.pow(2, attempt - 1), 5000)
|
|
101
|
-
await
|
|
101
|
+
await util.sleep(delayMs)
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
throw util.ensureError(lastError)
|
|
@@ -129,7 +129,6 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
|
|
|
129
129
|
}
|
|
130
130
|
},
|
|
131
131
|
final (callback) {
|
|
132
|
-
this.push(null)
|
|
133
132
|
callback()
|
|
134
133
|
}
|
|
135
134
|
})
|